#Apple Rhizosphere 16S File Processing with DADA2
otu_table input for
phyloseq.).Output that we need:
otu_tabletax_tablesample_data track the reads lots
throughout DADA2 workflow.#install.packages("devtools")
library("devtools")
## Loading required package: usethis
#devtools::install_github("thomasp85/patchwork@HEAD")
library(patchwork)
#install.packages("pacman")
library(pacman)
pacman::p_load(tidyverse, BiocManager, devtools, dada2,
phyloseq, patchwork, DT, iNEXT, vegan,
install = TRUE)
set.seed(092819)
# Set the raw fastq path to the raw sequencing files
# Path to the fastq files
raw_fastqs_path2 <- "data/01_DADA2/rawfastqs/data_2022"
raw_fastqs_path2
## [1] "data/01_DADA2/rawfastqs/data_2022"
# What files are in this path? Intuition Check
head(list.files(raw_fastqs_path2))
## [1] "13915_11671_187921_KTY92_Plate1_A06_B4_TTCCAAGG_CCTTGTAG_R1.fastq.gz"
## [2] "13915_11671_187921_KTY92_Plate1_A06_B4_TTCCAAGG_CCTTGTAG_R2.fastq.gz"
## [3] "13915_11671_187921_KTY92_Plate1_A07_G2_CGCATGAT_TCAGGCTT_R1.fastq.gz"
## [4] "13915_11671_187921_KTY92_Plate1_A07_G2_CGCATGAT_TCAGGCTT_R2.fastq.gz"
## [5] "13915_11671_187921_KTY92_Plate1_A08_G10_ACGGAACA_GTTCTCGT_R1.fastq.gz"
## [6] "13915_11671_187921_KTY92_Plate1_A08_G10_ACGGAACA_GTTCTCGT_R2.fastq.gz"
# How many files are there?
str(list.files(raw_fastqs_path2))
## chr [1:122] "13915_11671_187921_KTY92_Plate1_A06_B4_TTCCAAGG_CCTTGTAG_R1.fastq.gz" ...
forward_reads2 <- list.files(raw_fastqs_path2, pattern = "R1.fastq.gz", full.names = TRUE)
# Intuition Check
head(forward_reads2)
## [1] "data/01_DADA2/rawfastqs/data_2022/13915_11671_187921_KTY92_Plate1_A06_B4_TTCCAAGG_CCTTGTAG_R1.fastq.gz"
## [2] "data/01_DADA2/rawfastqs/data_2022/13915_11671_187921_KTY92_Plate1_A07_G2_CGCATGAT_TCAGGCTT_R1.fastq.gz"
## [3] "data/01_DADA2/rawfastqs/data_2022/13915_11671_187921_KTY92_Plate1_A08_G10_ACGGAACA_GTTCTCGT_R1.fastq.gz"
## [4] "data/01_DADA2/rawfastqs/data_2022/13915_11671_187921_KTY92_Plate1_A09_G18_CGGCTAAT_AGAACGAG_R1.fastq.gz"
## [5] "data/01_DADA2/rawfastqs/data_2022/13915_11671_187921_KTY92_Plate1_A10_G26_ATCGATCG_TGCTTCCA_R1.fastq.gz"
## [6] "data/01_DADA2/rawfastqs/data_2022/13915_11671_187921_KTY92_Plate1_A11_G34_GCAAGATC_CTTCGACT_R1.fastq.gz"
reverse_reads2 <- list.files(raw_fastqs_path2, pattern = "R2.fastq.gz", full.names = TRUE)
head(reverse_reads2)
## [1] "data/01_DADA2/rawfastqs/data_2022/13915_11671_187921_KTY92_Plate1_A06_B4_TTCCAAGG_CCTTGTAG_R2.fastq.gz"
## [2] "data/01_DADA2/rawfastqs/data_2022/13915_11671_187921_KTY92_Plate1_A07_G2_CGCATGAT_TCAGGCTT_R2.fastq.gz"
## [3] "data/01_DADA2/rawfastqs/data_2022/13915_11671_187921_KTY92_Plate1_A08_G10_ACGGAACA_GTTCTCGT_R2.fastq.gz"
## [4] "data/01_DADA2/rawfastqs/data_2022/13915_11671_187921_KTY92_Plate1_A09_G18_CGGCTAAT_AGAACGAG_R2.fastq.gz"
## [5] "data/01_DADA2/rawfastqs/data_2022/13915_11671_187921_KTY92_Plate1_A10_G26_ATCGATCG_TGCTTCCA_R2.fastq.gz"
## [6] "data/01_DADA2/rawfastqs/data_2022/13915_11671_187921_KTY92_Plate1_A11_G34_GCAAGATC_CTTCGACT_R2.fastq.gz"
# Randomly select 12 samples from dataset to evaluate
random_samples2 <- sample(1:length(reverse_reads2), size = 12)
random_samples2
## [1] 53 26 8 7 52 54 33 1 51 9 37 40
# Calculate and plot quality of these twelve samples
forward_plot_12_2 <- plotQualityProfile(forward_reads2[random_samples2]) +
labs(title = "Forward Read: Raw Quality 2022")
reverse_plot_12_2 <- plotQualityProfile(reverse_reads2[random_samples2]) +
labs(title = "Reverse Read: Raw Quality 2022")
# Plot them together with patchwork
forward_plot_12_2 + reverse_plot_12_2
# Aggregate all QC plots
# Forward reads
forward_preQC_plot2 <-
plotQualityProfile(forward_reads2, aggregate = TRUE) +
labs(title = "Forward Pre-QC 2022")
# reverse reads
reverse_preQC_plot2 <-
plotQualityProfile(reverse_reads2, aggregate = TRUE) +
labs(title = "Reverse Pre-QC 2022")
preQC_aggregate_plot2 <-
# Plot the forward and reverse together
forward_preQC_plot2 + reverse_preQC_plot2
# Show the plot
preQC_aggregate_plot2
The quality of the 2022 reads leaves a bit to be desired. We don’t want to truncate too much, but a bit at the beginning, and then the primer on the other end can be truncated. Filtering should help the quality a bit.
# vector of our samples, extract sample name from files
samples2 <- paste0(sapply(strsplit(basename(forward_reads2), "_"), `[`,7), "_2022")
# Intuition Check
head(samples2)
## [1] "B4_2022" "G2_2022" "G10_2022" "G18_2022" "G26_2022" "G34_2022"
# Place filtered reads into filtered_fastqs_path
filtered_fastqs_path2 <- "data/01_DADA2/02_filtered_fastqs_2022"
filtered_fastqs_path2
## [1] "data/01_DADA2/02_filtered_fastqs_2022"
# create 2 variables: filtered_F, filtered_R
filtered_forward_reads2 <-
file.path(filtered_fastqs_path2, paste0(samples2, "_R1_filtered_2022.fastq.gz"))
length(filtered_forward_reads2)
## [1] 59
# reverse reads
filtered_reverse_reads2 <-
file.path(filtered_fastqs_path2, paste0(samples2, "_R2_filtered_2022.fastq.gz"))
length(filtered_reverse_reads2)
## [1] 59
filtered_reads2<-filterAndTrim(forward_reads2, filtered_forward_reads2,
reverse_reads2, filtered_reverse_reads2,
truncLen = c(247,247), trimLeft = c(18,21),
maxN = 0, maxEE = c(2,2), truncQ = 2,
rm.phix = TRUE, compress = TRUE,
multithread = TRUE)
# Plot the 12 random samples after QC
forward_filteredQual_plot_12_2 <-
plotQualityProfile(filtered_forward_reads2[random_samples2]) +
labs(title = "Trimmed Forward Read Quality 2022")
reverse_filteredQual_plot_12_2 <-
plotQualityProfile(filtered_reverse_reads2[random_samples2]) +
labs(title = "Trimmed Reverse Read Quality 2022")
# Put the two plots together
forward_filteredQual_plot_12_2 + reverse_filteredQual_plot_12_2
# Aggregate all QC plots
# Forward reads
forward_postQC_plot2 <-
plotQualityProfile(filtered_forward_reads2, aggregate = TRUE) +
labs(title = "Forward Post-QC 2022")
# reverse reads
reverse_postQC_plot2 <-
plotQualityProfile(filtered_reverse_reads2, aggregate = TRUE) +
labs(title = "Reverse Post-QC 2022")
postQC_aggregate_plot2 <-
# Plot the forward and reverse together
forward_postQC_plot2 + reverse_postQC_plot2
# Show the plot
postQC_aggregate_plot2
Here, we see the sequences are improved from before. The very low quality that we saw in the pre-QC plots is mostly gone. There is some low quality towards the end on the reverse reads especially, but it will hopefully overlap with the good quality sections of the opposite read.
filterAndTrim# Make output into dataframe
filtered_df2 <- as.data.frame(filtered_reads2)
head(filtered_df2)
## reads.in
## 13915_11671_187921_KTY92_Plate1_A06_B4_TTCCAAGG_CCTTGTAG_R1.fastq.gz 204642
## 13915_11671_187921_KTY92_Plate1_A07_G2_CGCATGAT_TCAGGCTT_R1.fastq.gz 175656
## 13915_11671_187921_KTY92_Plate1_A08_G10_ACGGAACA_GTTCTCGT_R1.fastq.gz 138589
## 13915_11671_187921_KTY92_Plate1_A09_G18_CGGCTAAT_AGAACGAG_R1.fastq.gz 202865
## 13915_11671_187921_KTY92_Plate1_A10_G26_ATCGATCG_TGCTTCCA_R1.fastq.gz 202611
## 13915_11671_187921_KTY92_Plate1_A11_G34_GCAAGATC_CTTCGACT_R1.fastq.gz 188583
## reads.out
## 13915_11671_187921_KTY92_Plate1_A06_B4_TTCCAAGG_CCTTGTAG_R1.fastq.gz 112436
## 13915_11671_187921_KTY92_Plate1_A07_G2_CGCATGAT_TCAGGCTT_R1.fastq.gz 109166
## 13915_11671_187921_KTY92_Plate1_A08_G10_ACGGAACA_GTTCTCGT_R1.fastq.gz 77962
## 13915_11671_187921_KTY92_Plate1_A09_G18_CGGCTAAT_AGAACGAG_R1.fastq.gz 119856
## 13915_11671_187921_KTY92_Plate1_A10_G26_ATCGATCG_TGCTTCCA_R1.fastq.gz 123024
## 13915_11671_187921_KTY92_Plate1_A11_G34_GCAAGATC_CTTCGACT_R1.fastq.gz 117249
# calculate some stats
filtered_df2 %>%
reframe(median_reads_in2 = median(reads.in),
median_reads_out2 = median(reads.out),
median_percent_retained2 = (median(reads.out)/median(reads.in)))
## median_reads_in2 median_reads_out2 median_percent_retained2
## 1 182525 109166 0.5980879
About 60% of the reads were retained, this seems sufficient since sequencing depth was quite high.
# Plot the pre and post together in one plot
preQC_aggregate_plot2 / postQC_aggregate_plot2
# Forward reads
error_forward_reads2 <-
learnErrors(filtered_forward_reads2, multithread = 5)
## 124219676 total bases in 542444 reads from 5 samples will be used for learning the error rates.
# Plot Forward
forward_error_plot2 <-
plotErrors(error_forward_reads2, nominalQ = 5) +
labs(title = "Forward Read Error Model 2022")
# Reverse reads
error_reverse_reads2 <-
learnErrors(filtered_reverse_reads2, multithread = 5)
## 122592344 total bases in 542444 reads from 5 samples will be used for learning the error rates.
# Plot reverse
reverse_error_plot2 <-
plotErrors(error_reverse_reads2, nominalQ = 5) +
labs(title = "Reverse Read Error Model 2022")
# Put the two plots together
forward_error_plot2 + reverse_error_plot2
## Warning in scale_y_log10(): log-10 transformation introduced infinite values.
## log-10 transformation introduced infinite values.
## log-10 transformation introduced infinite values.
The black lines and dots seem to overlap. And the red lines are negative
An important note: This process occurs separately on forward and reverse reads! This is quite a different approach from how OTUs are identified in Mothur and also from UCHIME, oligotyping, and other OTU, MED, and ASV approaches.
# Infer ASVs on the forward sequences
dada_forward2 <- dada(filtered_forward_reads2,
err = error_forward_reads2,
multithread = TRUE)
## Sample 1 - 112436 reads in 68956 unique sequences.
## Sample 2 - 109166 reads in 49976 unique sequences.
## Sample 3 - 77962 reads in 45012 unique sequences.
## Sample 4 - 119856 reads in 67776 unique sequences.
## Sample 5 - 123024 reads in 71091 unique sequences.
## Sample 6 - 117249 reads in 56999 unique sequences.
## Sample 7 - 11991 reads in 7335 unique sequences.
## Sample 8 - 122578 reads in 71834 unique sequences.
## Sample 9 - 129923 reads in 58317 unique sequences.
## Sample 10 - 619 reads in 165 unique sequences.
## Sample 11 - 121345 reads in 64854 unique sequences.
## Sample 12 - 122765 reads in 67515 unique sequences.
## Sample 13 - 124955 reads in 71659 unique sequences.
## Sample 14 - 135315 reads in 76111 unique sequences.
## Sample 15 - 89941 reads in 53170 unique sequences.
## Sample 16 - 32169 reads in 23790 unique sequences.
## Sample 17 - 95121 reads in 48112 unique sequences.
## Sample 18 - 37661 reads in 25718 unique sequences.
## Sample 19 - 121260 reads in 51392 unique sequences.
## Sample 20 - 86351 reads in 51126 unique sequences.
## Sample 21 - 121137 reads in 69518 unique sequences.
## Sample 22 - 77310 reads in 51780 unique sequences.
## Sample 23 - 107076 reads in 58551 unique sequences.
## Sample 24 - 115796 reads in 46192 unique sequences.
## Sample 25 - 121997 reads in 54600 unique sequences.
## Sample 26 - 81300 reads in 31371 unique sequences.
## Sample 27 - 1814 reads in 1268 unique sequences.
## Sample 28 - 103325 reads in 54054 unique sequences.
## Sample 29 - 67069 reads in 42886 unique sequences.
## Sample 30 - 112150 reads in 67588 unique sequences.
## Sample 31 - 101341 reads in 49967 unique sequences.
## Sample 32 - 108028 reads in 43995 unique sequences.
## Sample 33 - 69305 reads in 37780 unique sequences.
## Sample 34 - 119851 reads in 62978 unique sequences.
## Sample 35 - 114951 reads in 72340 unique sequences.
## Sample 36 - 79183 reads in 39694 unique sequences.
## Sample 37 - 95044 reads in 54630 unique sequences.
## Sample 38 - 110978 reads in 65018 unique sequences.
## Sample 39 - 112801 reads in 57948 unique sequences.
## Sample 40 - 85656 reads in 45822 unique sequences.
## Sample 41 - 106142 reads in 43993 unique sequences.
## Sample 42 - 110276 reads in 51369 unique sequences.
## Sample 43 - 48560 reads in 39016 unique sequences.
## Sample 44 - 85342 reads in 49813 unique sequences.
## Sample 45 - 69813 reads in 35459 unique sequences.
## Sample 46 - 73527 reads in 46708 unique sequences.
## Sample 47 - 104282 reads in 43524 unique sequences.
## Sample 48 - 126156 reads in 64364 unique sequences.
## Sample 49 - 44508 reads in 32424 unique sequences.
## Sample 50 - 140122 reads in 59219 unique sequences.
## Sample 51 - 85663 reads in 61164 unique sequences.
## Sample 52 - 123554 reads in 77923 unique sequences.
## Sample 53 - 125520 reads in 65871 unique sequences.
## Sample 54 - 109189 reads in 51382 unique sequences.
## Sample 55 - 134427 reads in 68129 unique sequences.
## Sample 56 - 153905 reads in 58591 unique sequences.
## Sample 57 - 136558 reads in 80163 unique sequences.
## Sample 58 - 114312 reads in 67788 unique sequences.
## Sample 59 - 51238 reads in 32244 unique sequences.
typeof(dada_forward2)
## [1] "list"
# Grab a sample and look at it
dada_forward2$`20211005-MA-CWS1P_R1_filtered.fastq.gz`
## NULL
# Infer ASVs on the reverse sequences
dada_reverse2 <- dada(filtered_reverse_reads2,
err = error_reverse_reads2,
multithread = TRUE)
## Sample 1 - 112436 reads in 62904 unique sequences.
## Sample 2 - 109166 reads in 43644 unique sequences.
## Sample 3 - 77962 reads in 46477 unique sequences.
## Sample 4 - 119856 reads in 62905 unique sequences.
## Sample 5 - 123024 reads in 67702 unique sequences.
## Sample 6 - 117249 reads in 49104 unique sequences.
## Sample 7 - 11991 reads in 7046 unique sequences.
## Sample 8 - 122578 reads in 65918 unique sequences.
## Sample 9 - 129923 reads in 54639 unique sequences.
## Sample 10 - 619 reads in 188 unique sequences.
## Sample 11 - 121345 reads in 57639 unique sequences.
## Sample 12 - 122765 reads in 60449 unique sequences.
## Sample 13 - 124955 reads in 75466 unique sequences.
## Sample 14 - 135315 reads in 81230 unique sequences.
## Sample 15 - 89941 reads in 45611 unique sequences.
## Sample 16 - 32169 reads in 20126 unique sequences.
## Sample 17 - 95121 reads in 38069 unique sequences.
## Sample 18 - 37661 reads in 22551 unique sequences.
## Sample 19 - 121260 reads in 49695 unique sequences.
## Sample 20 - 86351 reads in 48349 unique sequences.
## Sample 21 - 121137 reads in 60617 unique sequences.
## Sample 22 - 77310 reads in 40097 unique sequences.
## Sample 23 - 107076 reads in 52423 unique sequences.
## Sample 24 - 115796 reads in 39571 unique sequences.
## Sample 25 - 121997 reads in 50916 unique sequences.
## Sample 26 - 81300 reads in 30494 unique sequences.
## Sample 27 - 1814 reads in 1356 unique sequences.
## Sample 28 - 103325 reads in 43595 unique sequences.
## Sample 29 - 67069 reads in 35188 unique sequences.
## Sample 30 - 112150 reads in 59493 unique sequences.
## Sample 31 - 101341 reads in 50983 unique sequences.
## Sample 32 - 108028 reads in 37936 unique sequences.
## Sample 33 - 69305 reads in 26784 unique sequences.
## Sample 34 - 119851 reads in 64945 unique sequences.
## Sample 35 - 114951 reads in 67909 unique sequences.
## Sample 36 - 79183 reads in 35456 unique sequences.
## Sample 37 - 95044 reads in 48762 unique sequences.
## Sample 38 - 110978 reads in 64573 unique sequences.
## Sample 39 - 112801 reads in 52600 unique sequences.
## Sample 40 - 85656 reads in 46717 unique sequences.
## Sample 41 - 106142 reads in 39242 unique sequences.
## Sample 42 - 110276 reads in 48991 unique sequences.
## Sample 43 - 48560 reads in 31079 unique sequences.
## Sample 44 - 85342 reads in 48943 unique sequences.
## Sample 45 - 69813 reads in 27702 unique sequences.
## Sample 46 - 73527 reads in 36793 unique sequences.
## Sample 47 - 104282 reads in 54104 unique sequences.
## Sample 48 - 126156 reads in 58456 unique sequences.
## Sample 49 - 44508 reads in 27121 unique sequences.
## Sample 50 - 140122 reads in 60041 unique sequences.
## Sample 51 - 85663 reads in 51586 unique sequences.
## Sample 52 - 123554 reads in 66618 unique sequences.
## Sample 53 - 125520 reads in 54953 unique sequences.
## Sample 54 - 109189 reads in 55607 unique sequences.
## Sample 55 - 134427 reads in 62970 unique sequences.
## Sample 56 - 153905 reads in 52688 unique sequences.
## Sample 57 - 136558 reads in 69771 unique sequences.
## Sample 58 - 114312 reads in 66585 unique sequences.
## Sample 59 - 51238 reads in 30239 unique sequences.
# Inspect
dada_reverse2[1]
## $B4_2022_R2_filtered_2022.fastq.gz
## dada-class: object describing DADA2 denoising results
## 2290 sequence variants were inferred from 62904 input unique sequences.
## Key parameters: OMEGA_A = 1e-40, OMEGA_C = 1e-40, BAND_SIZE = 16
dada_reverse2[30]
## $G6_2022_R2_filtered_2022.fastq.gz
## dada-class: object describing DADA2 denoising results
## 2616 sequence variants were inferred from 59493 input unique sequences.
## Key parameters: OMEGA_A = 1e-40, OMEGA_C = 1e-40, BAND_SIZE = 16
Now, merge the forward and reverse ASVs into contigs.
# merge forward and reverse ASVs
merged_ASVs2 <- mergePairs(dada_forward2, filtered_forward_reads2,
dada_reverse2, filtered_reverse_reads2,
verbose = TRUE)
## 49650 paired-reads (in 4771 unique pairings) successfully merged out of 93414 (in 31589 pairings) input.
## 70513 paired-reads (in 5531 unique pairings) successfully merged out of 97239 (in 19859 pairings) input.
## 39986 paired-reads (in 3152 unique pairings) successfully merged out of 65963 (in 18724 pairings) input.
## 56269 paired-reads (in 6117 unique pairings) successfully merged out of 99115 (in 31676 pairings) input.
## 54587 paired-reads (in 6137 unique pairings) successfully merged out of 100466 (in 33930 pairings) input.
## 66448 paired-reads (in 4772 unique pairings) successfully merged out of 101369 (in 23076 pairings) input.
## 5274 paired-reads (in 660 unique pairings) successfully merged out of 8664 (in 2466 pairings) input.
## 55043 paired-reads (in 6267 unique pairings) successfully merged out of 100011 (in 33825 pairings) input.
## 82776 paired-reads (in 7591 unique pairings) successfully merged out of 114502 (in 25264 pairings) input.
## 618 paired-reads (in 8 unique pairings) successfully merged out of 618 (in 8 pairings) input.
## 64451 paired-reads (in 6342 unique pairings) successfully merged out of 103929 (in 28741 pairings) input.
## 61765 paired-reads (in 6656 unique pairings) successfully merged out of 102457 (in 30888 pairings) input.
## 55552 paired-reads (in 5780 unique pairings) successfully merged out of 102258 (in 33984 pairings) input.
## 62643 paired-reads (in 6976 unique pairings) successfully merged out of 111672 (in 37170 pairings) input.
## 41554 paired-reads (in 4395 unique pairings) successfully merged out of 74171 (in 24234 pairings) input.
## 13615 paired-reads (in 893 unique pairings) successfully merged out of 25688 (in 7939 pairings) input.
## 61198 paired-reads (in 5069 unique pairings) successfully merged out of 84914 (in 18730 pairings) input.
## 17508 paired-reads (in 1266 unique pairings) successfully merged out of 31038 (in 9579 pairings) input.
## 82943 paired-reads (in 6255 unique pairings) successfully merged out of 109246 (in 20489 pairings) input.
## 39592 paired-reads (in 4085 unique pairings) successfully merged out of 70406 (in 22361 pairings) input.
## 60408 paired-reads (in 6170 unique pairings) successfully merged out of 102557 (in 31510 pairings) input.
## 33875 paired-reads (in 3462 unique pairings) successfully merged out of 63589 (in 22540 pairings) input.
## 53826 paired-reads (in 4966 unique pairings) successfully merged out of 89975 (in 25543 pairings) input.
## 85572 paired-reads (in 7501 unique pairings) successfully merged out of 106644 (in 18745 pairings) input.
## 80055 paired-reads (in 6355 unique pairings) successfully merged out of 109653 (in 22786 pairings) input.
## 59250 paired-reads (in 5216 unique pairings) successfully merged out of 74224 (in 13035 pairings) input.
## 1075 paired-reads (in 36 unique pairings) successfully merged out of 1352 (in 121 pairings) input.
## 62363 paired-reads (in 5711 unique pairings) successfully merged out of 92399 (in 22319 pairings) input.
## 30545 paired-reads (in 2929 unique pairings) successfully merged out of 55120 (in 18017 pairings) input.
## 50796 paired-reads (in 5103 unique pairings) successfully merged out of 92042 (in 30564 pairings) input.
## 63142 paired-reads (in 5060 unique pairings) successfully merged out of 90488 (in 20184 pairings) input.
## 75445 paired-reads (in 6064 unique pairings) successfully merged out of 98037 (in 17698 pairings) input.
## 42754 paired-reads (in 3704 unique pairings) successfully merged out of 61015 (in 14531 pairings) input.
## 62187 paired-reads (in 6507 unique pairings) successfully merged out of 101601 (in 28948 pairings) input.
## 45301 paired-reads (in 5811 unique pairings) successfully merged out of 91508 (in 34654 pairings) input.
## 46754 paired-reads (in 4188 unique pairings) successfully merged out of 67863 (in 16343 pairings) input.
## 44739 paired-reads (in 4367 unique pairings) successfully merged out of 79101 (in 24920 pairings) input.
## 49601 paired-reads (in 5039 unique pairings) successfully merged out of 90384 (in 29447 pairings) input.
## 63676 paired-reads (in 6111 unique pairings) successfully merged out of 96954 (in 25501 pairings) input.
## 43901 paired-reads (in 3798 unique pairings) successfully merged out of 71567 (in 19336 pairings) input.
## 72987 paired-reads (in 5562 unique pairings) successfully merged out of 95548 (in 17666 pairings) input.
## 67906 paired-reads (in 5937 unique pairings) successfully merged out of 97473 (in 21918 pairings) input.
## 15568 paired-reads (in 1878 unique pairings) successfully merged out of 37812 (in 16890 pairings) input.
## 41296 paired-reads (in 3835 unique pairings) successfully merged out of 70701 (in 21340 pairings) input.
## 44695 paired-reads (in 3650 unique pairings) successfully merged out of 63081 (in 13532 pairings) input.
## 36295 paired-reads (in 3069 unique pairings) successfully merged out of 62420 (in 19313 pairings) input.
## 71774 paired-reads (in 5764 unique pairings) successfully merged out of 94056 (in 18039 pairings) input.
## 70898 paired-reads (in 6675 unique pairings) successfully merged out of 109498 (in 28811 pairings) input.
## 18602 paired-reads (in 1430 unique pairings) successfully merged out of 36115 (in 12402 pairings) input.
## 95770 paired-reads (in 8678 unique pairings) successfully merged out of 128013 (in 25530 pairings) input.
## 30090 paired-reads (in 3784 unique pairings) successfully merged out of 67193 (in 28610 pairings) input.
## 50808 paired-reads (in 5754 unique pairings) successfully merged out of 100377 (in 36909 pairings) input.
## 69617 paired-reads (in 6588 unique pairings) successfully merged out of 107950 (in 28278 pairings) input.
## 64571 paired-reads (in 5401 unique pairings) successfully merged out of 95653 (in 21409 pairings) input.
## 78036 paired-reads (in 6930 unique pairings) successfully merged out of 117539 (in 29913 pairings) input.
## 107997 paired-reads (in 7429 unique pairings) successfully merged out of 140275 (in 24291 pairings) input.
## 59170 paired-reads (in 6853 unique pairings) successfully merged out of 110322 (in 38002 pairings) input.
## 50333 paired-reads (in 5647 unique pairings) successfully merged out of 93645 (in 31975 pairings) input.
## 21459 paired-reads (in 2606 unique pairings) successfully merged out of 39700 (in 13566 pairings) input.
# Evaluate the output
typeof(merged_ASVs2)
## [1] "list"
length(merged_ASVs2)
## [1] 59
names(merged_ASVs2)
## [1] "B4_2022_R1_filtered_2022.fastq.gz" "G2_2022_R1_filtered_2022.fastq.gz"
## [3] "G10_2022_R1_filtered_2022.fastq.gz" "G18_2022_R1_filtered_2022.fastq.gz"
## [5] "G26_2022_R1_filtered_2022.fastq.gz" "G34_2022_R1_filtered_2022.fastq.gz"
## [7] "V2_2022_R1_filtered_2022.fastq.gz" "B5_2022_R1_filtered_2022.fastq.gz"
## [9] "G3_2022_R1_filtered_2022.fastq.gz" "G11_2022_R1_filtered_2022.fastq.gz"
## [11] "G19_2022_R1_filtered_2022.fastq.gz" "G27_2022_R1_filtered_2022.fastq.gz"
## [13] "G35_2022_R1_filtered_2022.fastq.gz" "V3_2022_R1_filtered_2022.fastq.gz"
## [15] "B6_2022_R1_filtered_2022.fastq.gz" "G4_2022_R1_filtered_2022.fastq.gz"
## [17] "G12_2022_R1_filtered_2022.fastq.gz" "G20_2022_R1_filtered_2022.fastq.gz"
## [19] "G28_2022_R1_filtered_2022.fastq.gz" "M1_2022_R1_filtered_2022.fastq.gz"
## [21] "V4_2022_R1_filtered_2022.fastq.gz" "B7_2022_R1_filtered_2022.fastq.gz"
## [23] "G5_2022_R1_filtered_2022.fastq.gz" "G13_2022_R1_filtered_2022.fastq.gz"
## [25] "G21_2022_R1_filtered_2022.fastq.gz" "G29_2022_R1_filtered_2022.fastq.gz"
## [27] "M2_2022_R1_filtered_2022.fastq.gz" "V5_2022_R1_filtered_2022.fastq.gz"
## [29] "B8_2022_R1_filtered_2022.fastq.gz" "G6_2022_R1_filtered_2022.fastq.gz"
## [31] "G14_2022_R1_filtered_2022.fastq.gz" "G22_2022_R1_filtered_2022.fastq.gz"
## [33] "G30_2022_R1_filtered_2022.fastq.gz" "M3_2022_R1_filtered_2022.fastq.gz"
## [35] "S1_2022_R1_filtered_2022.fastq.gz" "B1_2022_R1_filtered_2022.fastq.gz"
## [37] "B9_2022_R1_filtered_2022.fastq.gz" "G7_2022_R1_filtered_2022.fastq.gz"
## [39] "G15_2022_R1_filtered_2022.fastq.gz" "G23_2022_R1_filtered_2022.fastq.gz"
## [41] "G31_2022_R1_filtered_2022.fastq.gz" "M4_2022_R1_filtered_2022.fastq.gz"
## [43] "S2_2022_R1_filtered_2022.fastq.gz" "B2_2022_R1_filtered_2022.fastq.gz"
## [45] "B10_2022_R1_filtered_2022.fastq.gz" "G8_2022_R1_filtered_2022.fastq.gz"
## [47] "G16_2022_R1_filtered_2022.fastq.gz" "G24_2022_R1_filtered_2022.fastq.gz"
## [49] "G32_2022_R1_filtered_2022.fastq.gz" "M5_2022_R1_filtered_2022.fastq.gz"
## [51] "S3_2022_R1_filtered_2022.fastq.gz" "B3_2022_R1_filtered_2022.fastq.gz"
## [53] "G1_2022_R1_filtered_2022.fastq.gz" "G9_2022_R1_filtered_2022.fastq.gz"
## [55] "G17_2022_R1_filtered_2022.fastq.gz" "G25_2022_R1_filtered_2022.fastq.gz"
## [57] "G33_2022_R1_filtered_2022.fastq.gz" "V1_2022_R1_filtered_2022.fastq.gz"
## [59] "S4_2022_R1_filtered_2022.fastq.gz"
# Inspect the merger data.frame from the 20210602-MA-ABB1P
head(merged_ASVs2[[3]])
## sequence
## 1 GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATACGTTAGTGTTTTGACGTTACCGACAGAATAAGCACCGGCTAACTCTGTGCCAGCAGCCGCGGTAATACAGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTCGTTAAGTTGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATTCAAAACTGTCGAGCTAGAGTATGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACCACCTGGACTGATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACA
## 2 GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACCTGCAGAAGAAGCGCCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGCGCAAGCGTTATCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTTTGTCGCGTCTGCCGTGAAAGTCCGGGGCTCAACTCCGGATCTGCGGTGGGTACGGGCAGACTAGAGTGATGTAGGGGAGACTGGAATTCCTGGTGTAGCGGTGAAATGCGCAGATATCAGGAGGAACACCGATGGCGAAGGCAGGTCTCTGGGCATTAACTGACGCTGAGGAGCGAAAGCATGGGGAGCGAACA
## 3 GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATACGTTGGTGTCTTGACGTTACCGACAGAATAAGCACCGGCTAACTCTGTGCCAGCAGCCGCGGTAATACAGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTTGTTAAGTTGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATTCAAAACTGACAAGCTAGAGTATGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACCACCTGGACTGATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACA
## 4 GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATACGTAAGTGTTTTGACGTTACCGACAGAATAAGCACCGGCTAACTCTGTGCCAGCAGCCGCGGTAATACAGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTCGTTAAGTTGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATTCAAAACTGTCGAGCTAGAGTATGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACCACCTGGACTGATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACA
## 5 GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACCTGCAGAAGAAGCGCCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGCGCAAGCGTTATCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTTTGTCGCGTCTGCCGTGAAAGTCCGGGGCTCAACTCCGGATCTGCGGTGGGTACGGGCAGACTAGAGTGATGTAGGGGAGACTGGAATTCCTGGTGTAGCGGTGAAATGCGCAGATATCAGGAGGAACACCGATGGCGAAGGCAGGTCTCTGGGCATTAACTGACGCTGAGGAGCGAAAGCATGGGGAGCGAACA
## 6 GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATACGTTGGTGTCTTGACGTTACCGACAGAATAAGCACCGGCTAACTCTGTGCCAGCAGCCGCGGTAATACAGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTCGTTAAGTTGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATTCAAAACTGTCGAGCTAGAGTATGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACCACCTGGACTGATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACA
## abundance forward reverse nmatch nmismatch nindel prefer accept
## 1 2024 1 1 29 0 0 2 TRUE
## 2 1499 5 3 49 0 0 2 TRUE
## 3 1330 2 2 29 0 0 1 TRUE
## 4 1037 3 1 29 0 0 2 TRUE
## 5 1028 7 3 49 0 0 2 TRUE
## 6 1026 2 1 29 0 0 2 TRUE
str(merged_ASVs2)
## List of 59
## $ B4_2022_R1_filtered_2022.fastq.gz :'data.frame': 4771 obs. of 9 variables:
## ..$ sequence : chr [1:4771] "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGAGGAGGAAGGCATTGTGGTTAATAA"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ ...
## ..$ abundance: int [1:4771] 953 624 609 603 513 479 468 459 390 347 ...
## ..$ forward : int [1:4771] 1 3 5 2 6 8 4 7 10 13 ...
## ..$ reverse : int [1:4771] 1 2 4 2 9 8 5 7 6 19 ...
## ..$ nmatch : int [1:4771] 29 49 54 49 29 54 29 54 29 49 ...
## ..$ nmismatch: int [1:4771] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:4771] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:4771] 2 2 2 2 2 2 2 2 2 2 ...
## ..$ accept : logi [1:4771] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ G2_2022_R1_filtered_2022.fastq.gz :'data.frame': 5531 obs. of 9 variables:
## ..$ sequence : chr [1:5531] "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "AAGGAATATTGGTCAATGGAGGCAACTCTGAACCAGCCATGCCGCGTGCAGGAAGACTGCCCTATGGGTTGTAAACTGCTTTTATCCGGGAATAAACCACATTACGTGTAA"| __truncated__ ...
## ..$ abundance: int [1:5531] 4162 2924 1935 1612 1573 1387 884 871 861 833 ...
## ..$ forward : int [1:5531] 2 1 3 5 6 1 4 9 8 11 ...
## ..$ reverse : int [1:5531] 1 2 4 3 1 6 2 10 1 3 ...
## ..$ nmatch : int [1:5531] 29 29 54 34 29 29 29 34 29 34 ...
## ..$ nmismatch: int [1:5531] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:5531] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:5531] 2 1 2 2 2 1 2 2 2 2 ...
## ..$ accept : logi [1:5531] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ G10_2022_R1_filtered_2022.fastq.gz:'data.frame': 3152 obs. of 9 variables:
## ..$ sequence : chr [1:3152] "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATAC"| __truncated__ ...
## ..$ abundance: int [1:3152] 2024 1499 1330 1037 1028 1026 637 601 548 320 ...
## ..$ forward : int [1:3152] 1 5 2 3 7 2 4 6 8 1 ...
## ..$ reverse : int [1:3152] 1 3 2 1 3 1 5 4 2 2 ...
## ..$ nmatch : int [1:3152] 29 49 29 29 49 29 29 29 29 29 ...
## ..$ nmismatch: int [1:3152] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:3152] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:3152] 2 2 1 2 2 2 1 2 2 1 ...
## ..$ accept : logi [1:3152] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ G18_2022_R1_filtered_2022.fastq.gz:'data.frame': 6117 obs. of 9 variables:
## ..$ sequence : chr [1:6117] "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ ...
## ..$ abundance: int [1:6117] 1361 1334 1120 998 671 630 509 395 393 392 ...
## ..$ forward : int [1:6117] 3 4 2 6 5 1 1 10 11 7 ...
## ..$ reverse : int [1:6117] 3 2 1 2 5 8 7 9 10 1 ...
## ..$ nmatch : int [1:6117] 54 49 29 49 29 29 29 29 54 29 ...
## ..$ nmismatch: int [1:6117] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:6117] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:6117] 2 2 2 2 2 1 1 2 2 2 ...
## ..$ accept : logi [1:6117] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ G26_2022_R1_filtered_2022.fastq.gz:'data.frame': 6137 obs. of 9 variables:
## ..$ sequence : chr [1:6137] "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGAGGAGGAAGGCATTGTGGTTAATAA"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ "AGGGAATCTTCCACAATGGACGAAAGTCTGATGGAGCAACGCCGCGTGAGCGATGAAGGCCTTCGGGTCGTAAAGCTCTGTTGTTAGGGAAGAACAAGTACCGGAGTAACT"| __truncated__ ...
## ..$ abundance: int [1:6137] 2388 1035 754 701 457 417 410 378 377 327 ...
## ..$ forward : int [1:6137] 1 2 3 4 5 9 8 13 12 7 ...
## ..$ reverse : int [1:6137] 1 2 4 5 6 6 7 11 8 13 ...
## ..$ nmatch : int [1:6137] 54 29 29 29 49 49 54 54 54 54 ...
## ..$ nmismatch: int [1:6137] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:6137] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:6137] 2 2 2 2 2 2 2 2 2 1 ...
## ..$ accept : logi [1:6137] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ G34_2022_R1_filtered_2022.fastq.gz:'data.frame': 4772 obs. of 9 variables:
## ..$ sequence : chr [1:4772] "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "AGGGAATCTTCCACAATGGACGAAAGTCTGATGGAGCAACGCCGCGTGAGCGATGAAGGCCTTCGGGTCGTAAAGCTCTGTTGTTAGGGAAGAACAAGTACCGGAGTAACT"| __truncated__ "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCACGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ ...
## ..$ abundance: int [1:4772] 21896 1000 826 761 644 533 517 492 419 374 ...
## ..$ forward : int [1:4772] 1 1 2 3 4 5 6 33 7 8 ...
## ..$ reverse : int [1:4772] 1 4 3 1 2 3 5 1 5 3 ...
## ..$ nmatch : int [1:4772] 54 54 29 54 29 29 49 54 49 54 ...
## ..$ nmismatch: int [1:4772] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:4772] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:4772] 2 1 2 2 2 2 2 2 2 2 ...
## ..$ accept : logi [1:4772] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ V2_2022_R1_filtered_2022.fastq.gz :'data.frame': 660 obs. of 9 variables:
## ..$ sequence : chr [1:660] "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGAGGAGGAAGGCATTGTGGTTAATAA"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ "AGGGAATCTTCCACAATGGACGAAAGTCTGATGGAGCAACGCCGCGTGAGCGATGAAGGCCTTCGGGTCGTAAAGCTCTGTTGTTAGGGAAGAACAAGTACCGGAGTAACT"| __truncated__ ...
## ..$ abundance: int [1:660] 661 142 120 107 99 73 68 60 51 51 ...
## ..$ forward : int [1:660] 1 2 3 4 6 197 7 8 85 104 ...
## ..$ reverse : int [1:660] 1 3 2 5 4 4 2 8 19 13 ...
## ..$ nmatch : int [1:660] 54 29 29 29 49 49 29 54 49 54 ...
## ..$ nmismatch: int [1:660] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:660] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:660] 2 1 2 2 2 2 2 2 1 2 ...
## ..$ accept : logi [1:660] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ B5_2022_R1_filtered_2022.fastq.gz :'data.frame': 6267 obs. of 9 variables:
## ..$ sequence : chr [1:6267] "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGAGGAGGAAGGCATTGTGGTTAATAA"| __truncated__ ...
## ..$ abundance: int [1:6267] 1789 724 656 480 404 383 311 297 275 263 ...
## ..$ forward : int [1:6267] 1 3 2 4 1 1 6 5 13 8 ...
## ..$ reverse : int [1:6267] 1 3 3 4 7 5 2 2 12 8 ...
## ..$ nmatch : int [1:6267] 29 49 49 29 29 29 29 29 49 29 ...
## ..$ nmismatch: int [1:6267] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:6267] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:6267] 1 2 2 2 1 1 2 2 2 2 ...
## ..$ accept : logi [1:6267] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ G3_2022_R1_filtered_2022.fastq.gz :'data.frame': 7591 obs. of 9 variables:
## ..$ sequence : chr [1:7591] "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCAGTAAGCGAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGAGGAGGAAGGCATTGTGGTTAATAA"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ ...
## ..$ abundance: int [1:7591] 5748 3968 1611 1277 1242 1057 834 819 786 723 ...
## ..$ forward : int [1:7591] 1 2 3 6 5 7 1 8 11 4 ...
## ..$ reverse : int [1:7591] 1 1 2 4 1 4 5 3 2 6 ...
## ..$ nmatch : int [1:7591] 29 29 29 49 29 49 29 29 29 29 ...
## ..$ nmismatch: int [1:7591] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:7591] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:7591] 2 2 2 2 2 2 1 2 2 1 ...
## ..$ accept : logi [1:7591] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ G11_2022_R1_filtered_2022.fastq.gz:'data.frame': 8 obs. of 9 variables:
## ..$ sequence : chr [1:8] "GGGGAATTTTGGACAATGGGCGCAAGCCTGATCCAGCAATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTTGTCCGGAAAGAAAACTTCTGGGTTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGGGGAGGAAGGGAGTAAAGTTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGCAAGCCTGATCCAGCCATGCCGCGTGGGTGAAGAAGGCCTTCGGGTTGTAAAGCCCTTTTGTCCGGAACGAAAAGCGATCGGTTAATAC"| __truncated__ "GGGGAATTTTGGACAATGGGGGCAACCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCGGCCGGAACGAAATCGCGCGGGCGAATAT"| __truncated__ ...
## ..$ abundance: int [1:8] 263 156 77 59 35 17 8 3
## ..$ forward : int [1:8] 1 2 3 4 5 6 8 7
## ..$ reverse : int [1:8] 1 2 3 4 5 6 7 8
## ..$ nmatch : int [1:8] 29 29 29 29 28 49 29 53
## ..$ nmismatch: int [1:8] 0 0 0 0 0 0 0 0
## ..$ nindel : int [1:8] 0 0 0 0 0 0 0 0
## ..$ prefer : num [1:8] 1 1 1 2 2 1 2 1
## ..$ accept : logi [1:8] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ G19_2022_R1_filtered_2022.fastq.gz:'data.frame': 6342 obs. of 9 variables:
## ..$ sequence : chr [1:6342] "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTGGTTGGCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGAGGAGGAAGGCATTGTGGTTAATAA"| __truncated__ ...
## ..$ abundance: int [1:6342] 3282 1904 1778 1351 778 703 615 578 546 540 ...
## ..$ forward : int [1:6342] 1 3 4 2 8 7 10 6 11 9 ...
## ..$ reverse : int [1:6342] 1 2 1 3 4 4 1 7 1 5 ...
## ..$ nmatch : int [1:6342] 29 54 29 29 49 49 29 29 29 29 ...
## ..$ nmismatch: int [1:6342] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:6342] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:6342] 2 2 2 1 2 2 2 2 2 2 ...
## ..$ accept : logi [1:6342] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ G27_2022_R1_filtered_2022.fastq.gz:'data.frame': 6656 obs. of 9 variables:
## ..$ sequence : chr [1:6656] "GGGGAATTTTGGACAATGGGCGAAAGCCTGATCCAGCAATGCCGCGTGAGTGAAGAAGGCCTTCGGGTTGTAAAGCTCTTTTGTCAGGGAAGAAACGGTGAGAGCTAATAT"| __truncated__ "GGGGAATTTTGGACAATGGGCGAAAGCCTGATCCAGCAATGCCGCGTGAGTGAAGAAGGCCTTCGGGTTGTAAAGCTCTTTTGTCAGGGAAGAAACGGTGAGAGCTAATAT"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ ...
## ..$ abundance: int [1:6656] 2958 1782 895 798 628 574 555 515 452 420 ...
## ..$ forward : int [1:6656] 1 2 6 5 3 4 10 7 13 14 ...
## ..$ reverse : int [1:6656] 1 1 3 3 4 6 11 2 12 10 ...
## ..$ nmatch : int [1:6656] 29 29 49 49 29 29 49 29 29 29 ...
## ..$ nmismatch: int [1:6656] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:6656] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:6656] 2 2 2 2 2 2 2 2 2 2 ...
## ..$ accept : logi [1:6656] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ G35_2022_R1_filtered_2022.fastq.gz:'data.frame': 5780 obs. of 9 variables:
## ..$ sequence : chr [1:5780] "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGAGGAGGAAGGCATTGTGGTTAATAA"| __truncated__ ...
## ..$ abundance: int [1:5780] 2781 835 795 384 382 375 352 350 261 237 ...
## ..$ forward : int [1:5780] 1 3 2 5 6 7 8 9 4 13 ...
## ..$ reverse : int [1:5780] 1 2 2 5 3 12 4 8 6 10 ...
## ..$ nmatch : int [1:5780] 54 49 49 29 29 49 54 54 29 54 ...
## ..$ nmismatch: int [1:5780] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:5780] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:5780] 2 2 2 1 2 1 2 2 1 2 ...
## ..$ accept : logi [1:5780] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ V3_2022_R1_filtered_2022.fastq.gz :'data.frame': 6976 obs. of 9 variables:
## ..$ sequence : chr [1:6976] "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCAGTTACCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCAGTAAATTAATAC"| __truncated__ ...
## ..$ abundance: int [1:6976] 1334 1003 843 793 635 590 550 539 511 462 ...
## ..$ forward : int [1:6976] 1 5 6 3 2 7 2 4 8 11 ...
## ..$ reverse : int [1:6976] 1 4 4 2 6 7 5 5 9 8 ...
## ..$ nmatch : int [1:6976] 29 49 49 29 29 29 29 29 54 29 ...
## ..$ nmismatch: int [1:6976] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:6976] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:6976] 2 2 2 2 1 1 1 2 2 2 ...
## ..$ accept : logi [1:6976] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ B6_2022_R1_filtered_2022.fastq.gz :'data.frame': 4395 obs. of 9 variables:
## ..$ sequence : chr [1:4395] "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGGACAATGGGCGCAAGCCTGATCCAGCCATGCCGCGTGAGTGATGAAGGCCCTAGGGTTGTAAAGCTCTTTCACCGGAGAAGATAATGACGGTATCCGGAG"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGAGGAGGAAGGCATTGTGGTTAATAA"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ ...
## ..$ abundance: int [1:4395] 934 772 723 657 593 591 392 323 310 259 ...
## ..$ forward : int [1:4395] 2 3 1 6 5 4 1 8 9 11 ...
## ..$ reverse : int [1:4395] 4 3 1 2 2 6 7 5 8 9 ...
## ..$ nmatch : int [1:4395] 54 54 29 49 49 29 29 29 29 54 ...
## ..$ nmismatch: int [1:4395] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:4395] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:4395] 2 2 2 2 2 2 1 2 2 2 ...
## ..$ accept : logi [1:4395] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ G4_2022_R1_filtered_2022.fastq.gz :'data.frame': 893 obs. of 9 variables:
## ..$ sequence : chr [1:893] "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ ...
## ..$ abundance: int [1:893] 2032 498 278 266 255 231 209 204 194 189 ...
## ..$ forward : int [1:893] 1 2 81 6 3 9 7 4 10 11 ...
## ..$ reverse : int [1:893] 1 3 7 2 4 7 2 6 2 8 ...
## ..$ nmatch : int [1:893] 54 29 49 29 29 49 29 29 29 29 ...
## ..$ nmismatch: int [1:893] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:893] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:893] 1 2 2 2 2 2 2 2 2 2 ...
## ..$ accept : logi [1:893] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ G12_2022_R1_filtered_2022.fastq.gz:'data.frame': 5069 obs. of 9 variables:
## ..$ sequence : chr [1:5069] "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCCTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ ...
## ..$ abundance: int [1:5069] 4267 3214 2452 1749 1196 911 822 799 789 610 ...
## ..$ forward : int [1:5069] 1 4 3 2 6 5 9 8 2 14 ...
## ..$ reverse : int [1:5069] 1 1 2 2 1 5 6 6 3 8 ...
## ..$ nmatch : int [1:5069] 29 29 29 29 29 29 49 49 29 34 ...
## ..$ nmismatch: int [1:5069] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:5069] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:5069] 2 2 2 2 2 2 2 2 1 2 ...
## ..$ accept : logi [1:5069] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ G20_2022_R1_filtered_2022.fastq.gz:'data.frame': 1266 obs. of 9 variables:
## ..$ sequence : chr [1:1266] "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGAGGAGGAAGGCATTGTGGTTAATAA"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ ...
## ..$ abundance: int [1:1266] 1203 393 374 335 300 294 290 278 202 195 ...
## ..$ forward : int [1:1266] 1 1 3 6 1 7 5 4 8 2 ...
## ..$ reverse : int [1:1266] 1 3 5 6 4 7 6 2 2 3 ...
## ..$ nmatch : int [1:1266] 29 29 29 49 29 54 49 29 29 29 ...
## ..$ nmismatch: int [1:1266] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:1266] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:1266] 1 1 2 2 1 2 2 2 2 2 ...
## ..$ accept : logi [1:1266] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ G28_2022_R1_filtered_2022.fastq.gz:'data.frame': 6255 obs. of 9 variables:
## ..$ sequence : chr [1:6255] "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ "GAGGAATATTGGTCAATGGACGCAAGTCTGAACCAGCCATGCCGCGTGCAGGATGACGGTCCTATGGATTGTAAACTGCTTTTGTACGAGAAGAAACACTCCTATGTATAG"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTGGTTGGCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCCTAATAC"| __truncated__ ...
## ..$ abundance: int [1:6255] 8112 4181 2354 2039 1892 1601 1567 1365 967 792 ...
## ..$ forward : int [1:6255] 1 2 4 5 3 6 7 8 10 13 ...
## ..$ reverse : int [1:6255] 1 2 1 1 3 1 1 6 4 1 ...
## ..$ nmatch : int [1:6255] 29 34 29 29 29 29 29 54 34 29 ...
## ..$ nmismatch: int [1:6255] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:6255] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:6255] 2 2 2 2 1 2 2 2 2 2 ...
## ..$ accept : logi [1:6255] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ M1_2022_R1_filtered_2022.fastq.gz :'data.frame': 4085 obs. of 9 variables:
## ..$ sequence : chr [1:4085] "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGCAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ ...
## ..$ abundance: int [1:4085] 1360 806 637 467 310 310 306 266 259 253 ...
## ..$ forward : int [1:4085] 1 2 3 4 7 6 10 43 8 14 ...
## ..$ reverse : int [1:4085] 1 2 2 5 6 4 7 28 3 10 ...
## ..$ nmatch : int [1:4085] 54 49 49 49 54 54 29 49 29 54 ...
## ..$ nmismatch: int [1:4085] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:4085] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:4085] 2 2 2 2 2 2 2 1 2 2 ...
## ..$ accept : logi [1:4085] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ V4_2022_R1_filtered_2022.fastq.gz :'data.frame': 6170 obs. of 9 variables:
## ..$ sequence : chr [1:6170] "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCAGTAACTTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ ...
## ..$ abundance: int [1:6170] 1490 1483 1324 1089 1031 1030 683 643 570 567 ...
## ..$ forward : int [1:6170] 2 1 4 5 7 3 8 6 9 10 ...
## ..$ reverse : int [1:6170] 1 2 2 3 3 4 1 7 1 6 ...
## ..$ nmatch : int [1:6170] 29 29 29 49 49 29 29 29 29 29 ...
## ..$ nmismatch: int [1:6170] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:6170] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:6170] 2 2 2 2 2 2 2 2 2 2 ...
## ..$ accept : logi [1:6170] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ B7_2022_R1_filtered_2022.fastq.gz :'data.frame': 3462 obs. of 9 variables:
## ..$ sequence : chr [1:3462] "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCAGTTACCTAATAC"| __truncated__ ...
## ..$ abundance: int [1:3462] 858 724 659 357 329 282 259 228 215 174 ...
## ..$ forward : int [1:3462] 1 2 3 5 6 4 9 15 10 14 ...
## ..$ reverse : int [1:3462] 2 1 1 4 3 5 8 17 9 7 ...
## ..$ nmatch : int [1:3462] 54 49 49 29 29 29 29 49 29 54 ...
## ..$ nmismatch: int [1:3462] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:3462] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:3462] 2 2 2 2 2 2 2 2 2 2 ...
## ..$ accept : logi [1:3462] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ G5_2022_R1_filtered_2022.fastq.gz :'data.frame': 4966 obs. of 9 variables:
## ..$ sequence : chr [1:4966] "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "AGGGAATCTTCCACAATGGACGAAAGTCTGATGGAGCAACGCCGCGTGAGCGATGAAGGCCTTCGGGTCGTAAAGCTCTGTTGTTAGGGAAGAACAAGTACCGGAGTAACT"| __truncated__ ...
## ..$ abundance: int [1:4966] 4058 3218 1626 513 482 360 355 335 312 278 ...
## ..$ forward : int [1:4966] 1 2 3 5 6 7 11 9 4 12 ...
## ..$ reverse : int [1:4966] 1 1 2 7 3 5 9 8 6 10 ...
## ..$ nmatch : int [1:4966] 49 49 54 29 29 29 54 29 29 29 ...
## ..$ nmismatch: int [1:4966] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:4966] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:4966] 2 2 2 2 2 2 2 2 2 2 ...
## ..$ accept : logi [1:4966] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ G13_2022_R1_filtered_2022.fastq.gz:'data.frame': 7501 obs. of 9 variables:
## ..$ sequence : chr [1:7501] "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATAC"| __truncated__ "GAGGAATATTGGTCAATGGACGCAAGTCTGAACCAGCCATGCCGCGTGCAGGATGACGGTCCTATGGATTGTAAACTGCTTTTGTACGAGAAGAAACATCCCGACGTGTCG"| __truncated__ "GAGGAATATTGGTCAATGGACGCAAGTCTGAACCAGCCATGCCGCGTGCAGGATGACGGTCCTATGGATTGTAAACTGCTTTTGTACGAGAAGAAACACTCCTATGTATAG"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCCTAATAC"| __truncated__ ...
## ..$ abundance: int [1:7501] 4336 2491 1867 1750 1604 1377 1158 1154 1137 1027 ...
## ..$ forward : int [1:7501] 1 3 8 5 6 2 12 2 4 13 ...
## ..$ reverse : int [1:7501] 2 3 3 1 1 4 10 6 5 9 ...
## ..$ nmatch : int [1:7501] 29 34 34 29 29 29 29 29 29 29 ...
## ..$ nmismatch: int [1:7501] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:7501] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:7501] 2 2 2 2 2 1 2 1 2 2 ...
## ..$ accept : logi [1:7501] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ G21_2022_R1_filtered_2022.fastq.gz:'data.frame': 6355 obs. of 9 variables:
## ..$ sequence : chr [1:6355] "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGGTCGCTAATAA"| __truncated__ "GAGGAATATTGGACAATGGGTGAGAGCCTGATCCAGCCATCCCGCGTGAAGGACGACGGCCCTATGGGTTGTAAACTTCTTTTGTATAGGGATAAACCTACTCTCGTGAGA"| __truncated__ ...
## ..$ abundance: int [1:6355] 5440 2601 2554 2229 1974 1491 1434 1409 1263 1211 ...
## ..$ forward : int [1:6355] 1 3 4 5 6 7 2 9 11 2 ...
## ..$ reverse : int [1:6355] 1 1 1 2 1 4 3 1 1 5 ...
## ..$ nmatch : int [1:6355] 29 29 29 34 29 29 29 29 29 29 ...
## ..$ nmismatch: int [1:6355] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:6355] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:6355] 2 2 2 2 2 2 1 2 2 1 ...
## ..$ accept : logi [1:6355] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ G29_2022_R1_filtered_2022.fastq.gz:'data.frame': 5216 obs. of 9 variables:
## ..$ sequence : chr [1:5216] "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGAGGAGGAAGGCATTGTGGTTAATAA"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ ...
## ..$ abundance: int [1:5216] 4003 2658 1509 1201 1091 900 824 786 781 771 ...
## ..$ forward : int [1:5216] 1 2 5 6 2 10 4 13 7 3 ...
## ..$ reverse : int [1:5216] 1 2 4 4 6 3 7 3 5 6 ...
## ..$ nmatch : int [1:5216] 29 29 49 49 29 29 29 29 29 29 ...
## ..$ nmismatch: int [1:5216] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:5216] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:5216] 1 1 2 2 1 2 1 2 2 2 ...
## ..$ accept : logi [1:5216] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ M2_2022_R1_filtered_2022.fastq.gz :'data.frame': 36 obs. of 9 variables:
## ..$ sequence : chr [1:36] "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCCATTACCTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCCATTACCTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ ...
## ..$ abundance: int [1:36] 375 95 86 79 57 52 43 34 30 28 ...
## ..$ forward : int [1:36] 1 2 1 2 1 7 2 10 7 3 ...
## ..$ reverse : int [1:36] 1 2 2 1 11 2 11 3 1 4 ...
## ..$ nmatch : int [1:36] 29 29 29 29 29 29 29 29 29 54 ...
## ..$ nmismatch: int [1:36] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:36] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:36] 1 1 1 2 1 2 1 2 2 1 ...
## ..$ accept : logi [1:36] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ V5_2022_R1_filtered_2022.fastq.gz :'data.frame': 5711 obs. of 9 variables:
## ..$ sequence : chr [1:5711] "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "AGGGAATCTTCCACAATGGACGAAAGTCTGATGGAGCAACGCCGCGTGAGCGATGAAGGCCTTCGGGTCGTAAAGCTCTGTTGTTAGGGAAGAACAAGTACCGGAGTAACT"| __truncated__ ...
## ..$ abundance: int [1:5711] 10284 1388 1370 995 714 645 644 547 459 424 ...
## ..$ forward : int [1:5711] 1 4 3 5 7 2 2 8 1 9 ...
## ..$ reverse : int [1:5711] 1 3 3 5 2 2 7 6 10 2 ...
## ..$ nmatch : int [1:5711] 54 49 49 29 29 29 29 29 54 29 ...
## ..$ nmismatch: int [1:5711] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:5711] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:5711] 2 2 2 2 2 2 1 2 1 2 ...
## ..$ accept : logi [1:5711] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ B8_2022_R1_filtered_2022.fastq.gz :'data.frame': 2929 obs. of 9 variables:
## ..$ sequence : chr [1:2929] "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGAGGAGGAAGGCATTGTGGTTAATAA"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ ...
## ..$ abundance: int [1:2929] 1026 729 640 512 467 456 281 276 255 171 ...
## ..$ forward : int [1:2929] 1 3 2 6 5 4 8 10 9 14 ...
## ..$ reverse : int [1:2929] 2 1 4 3 5 3 1 6 9 17 ...
## ..$ nmatch : int [1:2929] 54 29 29 49 54 49 29 49 53 49 ...
## ..$ nmismatch: int [1:2929] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:2929] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:2929] 2 2 2 2 2 2 2 2 2 1 ...
## ..$ accept : logi [1:2929] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ G6_2022_R1_filtered_2022.fastq.gz :'data.frame': 5103 obs. of 9 variables:
## ..$ sequence : chr [1:5103] "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ ...
## ..$ abundance: int [1:5103] 1538 974 782 735 494 457 318 298 280 273 ...
## ..$ forward : int [1:5103] 1 3 5 2 4 6 7 4 13 9 ...
## ..$ reverse : int [1:5103] 1 2 2 3 5 3 4 8 9 6 ...
## ..$ nmatch : int [1:5103] 54 49 49 29 29 29 29 29 51 54 ...
## ..$ nmismatch: int [1:5103] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:5103] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:5103] 2 2 2 2 1 2 2 1 2 2 ...
## ..$ accept : logi [1:5103] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ G14_2022_R1_filtered_2022.fastq.gz:'data.frame': 5060 obs. of 9 variables:
## ..$ sequence : chr [1:5060] "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCAGTAAGATAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCAGTAAGCGAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ ...
## ..$ abundance: int [1:5060] 4149 2483 2182 1039 1022 878 670 660 489 422 ...
## ..$ forward : int [1:5060] 1 2 3 4 8 9 6 5 2 5 ...
## ..$ reverse : int [1:5060] 1 1 2 1 2 4 7 6 5 1 ...
## ..$ nmatch : int [1:5060] 29 29 49 29 49 29 29 29 29 29 ...
## ..$ nmismatch: int [1:5060] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:5060] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:5060] 2 2 2 2 2 2 1 1 1 2 ...
## ..$ accept : logi [1:5060] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ G22_2022_R1_filtered_2022.fastq.gz:'data.frame': 6064 obs. of 9 variables:
## ..$ sequence : chr [1:6064] "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGGTCGCTAATAA"| __truncated__ ...
## ..$ abundance: int [1:6064] 3352 2656 1589 1479 1286 1254 1213 1200 1108 1034 ...
## ..$ forward : int [1:6064] 1 2 4 7 6 1 5 8 10 9 ...
## ..$ reverse : int [1:6064] 3 2 1 1 4 2 6 7 1 4 ...
## ..$ nmatch : int [1:6064] 29 29 29 29 34 29 34 34 29 34 ...
## ..$ nmismatch: int [1:6064] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:6064] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:6064] 1 2 2 2 2 1 2 2 2 2 ...
## ..$ accept : logi [1:6064] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ G30_2022_R1_filtered_2022.fastq.gz:'data.frame': 3704 obs. of 9 variables:
## ..$ sequence : chr [1:3704] "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTGGTTGGCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGAGGAGGAAGGCATTGTGGTTAATAA"| __truncated__ "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ ...
## ..$ abundance: int [1:3704] 3368 1298 1263 1004 958 951 895 850 809 746 ...
## ..$ forward : int [1:3704] 1 4 3 7 5 6 2 10 2 9 ...
## ..$ reverse : int [1:3704] 1 2 3 8 2 5 4 6 1 6 ...
## ..$ nmatch : int [1:3704] 29 29 29 54 29 29 29 49 29 49 ...
## ..$ nmismatch: int [1:3704] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:3704] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:3704] 2 2 2 2 2 2 1 2 2 2 ...
## ..$ accept : logi [1:3704] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ M3_2022_R1_filtered_2022.fastq.gz :'data.frame': 6507 obs. of 9 variables:
## ..$ sequence : chr [1:6507] "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGAGGAGGAAGGCATTGTGGTTAATAA"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ ...
## ..$ abundance: int [1:6507] 2302 1632 1506 1392 1130 842 831 711 642 556 ...
## ..$ forward : int [1:6507] 1 3 4 5 7 2 8 6 2 10 ...
## ..$ reverse : int [1:6507] 2 1 3 3 6 4 1 7 5 9 ...
## ..$ nmatch : int [1:6507] 29 29 49 49 54 29 29 29 29 29 ...
## ..$ nmismatch: int [1:6507] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:6507] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:6507] 1 2 2 2 2 1 2 1 1 2 ...
## ..$ accept : logi [1:6507] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ S1_2022_R1_filtered_2022.fastq.gz :'data.frame': 5811 obs. of 9 variables:
## ..$ sequence : chr [1:5811] "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "AGGGAATCTTCCGCAATGGACGAAAGTCTGACGGAGCAACGCCGCGTGAGTGATGAAGGCTTTCGGGTCGTAAAACTCTGTTGTTAGGGAAGAACAAGTACGAGAGTAACT"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGGGTGAAGAAGGCCTTAGGGTTGTAAACCCCTTTCAGCGGGGAAGATAATGACGGTACCCGCAG"| __truncated__ ...
## ..$ abundance: int [1:5811] 830 730 608 363 264 257 245 218 186 173 ...
## ..$ forward : int [1:5811] 2 1 3 5 12 11 6 9 13 16 ...
## ..$ reverse : int [1:5811] 1 1 2 3 27 7 2 3 4 5 ...
## ..$ nmatch : int [1:5811] 49 49 29 54 48 49 29 54 37 54 ...
## ..$ nmismatch: int [1:5811] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:5811] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:5811] 2 2 2 2 1 2 2 2 2 2 ...
## ..$ accept : logi [1:5811] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ B1_2022_R1_filtered_2022.fastq.gz :'data.frame': 4188 obs. of 9 variables:
## ..$ sequence : chr [1:4188] "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGAGGAGGAAGGCATTGTGGTTAATAA"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ ...
## ..$ abundance: int [1:4188] 3221 1425 1224 735 643 599 599 589 473 468 ...
## ..$ forward : int [1:4188] 1 2 4 3 8 6 5 7 11 3 ...
## ..$ reverse : int [1:4188] 1 3 2 1 6 2 5 6 2 4 ...
## ..$ nmatch : int [1:4188] 29 29 29 29 49 29 29 49 29 29 ...
## ..$ nmismatch: int [1:4188] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:4188] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:4188] 2 2 2 2 2 2 2 2 2 2 ...
## ..$ accept : logi [1:4188] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ B9_2022_R1_filtered_2022.fastq.gz :'data.frame': 4367 obs. of 9 variables:
## ..$ sequence : chr [1:4367] "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCCATTACCTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ ...
## ..$ abundance: int [1:4367] 1322 1248 1080 602 385 346 315 297 291 246 ...
## ..$ forward : int [1:4367] 2 1 3 5 7 8 1 4 1 15 ...
## ..$ reverse : int [1:4367] 1 2 3 4 4 10 9 8 5 11 ...
## ..$ nmatch : int [1:4367] 29 29 54 49 49 49 29 29 29 51 ...
## ..$ nmismatch: int [1:4367] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:4367] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:4367] 2 1 2 2 2 2 1 1 1 2 ...
## ..$ accept : logi [1:4367] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ G7_2022_R1_filtered_2022.fastq.gz :'data.frame': 5039 obs. of 9 variables:
## ..$ sequence : chr [1:5039] "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGCAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ ...
## ..$ abundance: int [1:5039] 1900 875 688 388 382 378 332 279 263 234 ...
## ..$ forward : int [1:5039] 1 2 3 6 5 4 7 11 10 12 ...
## ..$ reverse : int [1:5039] 1 2 2 8 3 4 5 10 6 12 ...
## ..$ nmatch : int [1:5039] 54 49 49 49 29 29 29 54 54 29 ...
## ..$ nmismatch: int [1:5039] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:5039] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:5039] 2 2 2 1 2 1 2 2 2 2 ...
## ..$ accept : logi [1:5039] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ G15_2022_R1_filtered_2022.fastq.gz:'data.frame': 6111 obs. of 9 variables:
## ..$ sequence : chr [1:6111] "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGTAGGGAGGAAAGGGTGTAACTTAATAC"| __truncated__ "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ ...
## ..$ abundance: int [1:6111] 4167 3071 1463 1314 1020 980 845 727 705 542 ...
## ..$ forward : int [1:6111] 1 2 3 5 6 8 7 4 1 12 ...
## ..$ reverse : int [1:6111] 1 4 3 2 3 2 5 7 8 9 ...
## ..$ nmatch : int [1:6111] 29 54 29 29 29 29 29 29 29 49 ...
## ..$ nmismatch: int [1:6111] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:6111] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:6111] 2 2 2 2 2 2 1 1 1 2 ...
## ..$ accept : logi [1:6111] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ G23_2022_R1_filtered_2022.fastq.gz:'data.frame': 3798 obs. of 9 variables:
## ..$ sequence : chr [1:3798] "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ ...
## ..$ abundance: int [1:3798] 4981 775 769 623 587 557 418 410 375 350 ...
## ..$ forward : int [1:3798] 1 2 4 5 6 3 9 8 7 11 ...
## ..$ reverse : int [1:3798] 1 2 3 2 3 4 6 2 5 2 ...
## ..$ nmatch : int [1:3798] 54 29 49 29 49 29 29 29 29 29 ...
## ..$ nmismatch: int [1:3798] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:3798] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:3798] 2 2 2 2 2 1 2 2 2 2 ...
## ..$ accept : logi [1:3798] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ G31_2022_R1_filtered_2022.fastq.gz:'data.frame': 5562 obs. of 9 variables:
## ..$ sequence : chr [1:5562] "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCCTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ ...
## ..$ abundance: int [1:5562] 4691 2408 2362 2322 1410 1051 870 849 839 793 ...
## ..$ forward : int [1:5562] 1 3 2 1 1 8 10 2 1 4 ...
## ..$ reverse : int [1:5562] 1 3 2 4 2 6 3 1 5 5 ...
## ..$ nmatch : int [1:5562] 29 29 29 29 29 34 29 29 29 29 ...
## ..$ nmismatch: int [1:5562] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:5562] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:5562] 1 2 2 1 1 2 2 2 1 2 ...
## ..$ accept : logi [1:5562] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ M4_2022_R1_filtered_2022.fastq.gz :'data.frame': 5937 obs. of 9 variables:
## ..$ sequence : chr [1:5937] "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTGGTTGGCTAATAC"| __truncated__ "AGGGAATATTGGACAATGGGCGCAAGCCTGATCCAGCAATGCCGCGTGAGTGATGAAGGCCTTAGGGTTGTAAAGCTCTTTTACCCGAGATGATAATGACAGTATCGGGAG"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ ...
## ..$ abundance: int [1:5937] 7270 2414 1867 1525 1442 1294 1208 802 796 686 ...
## ..$ forward : int [1:5937] 1 3 4 6 5 2 7 2 8 9 ...
## ..$ reverse : int [1:5937] 1 1 2 1 4 3 1 1 1 1 ...
## ..$ nmatch : int [1:5937] 29 29 54 29 54 29 29 29 29 29 ...
## ..$ nmismatch: int [1:5937] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:5937] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:5937] 2 2 2 2 2 1 2 2 2 2 ...
## ..$ accept : logi [1:5937] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ S2_2022_R1_filtered_2022.fastq.gz :'data.frame': 1878 obs. of 9 variables:
## ..$ sequence : chr [1:1878] "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "AGGGAATCTTCCGCAATGGACGAAAGTCTGACGGAGCAACGCCGCGTGAGTGATGAAGGCTTTCGGGTCGTAAAACTCTGTTGTTAGGGAAGAACAAGTACGAGAGTAACT"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGGGTGAAGAAGGCCTTAGGGTTGTAAACCCCTTTCAGCGGGGAAGATAATGACGGTACCCGCAG"| __truncated__ ...
## ..$ abundance: int [1:1878] 427 341 341 228 155 149 144 126 103 94 ...
## ..$ forward : int [1:1878] 1 2 13 3 4 5 8 42 220 6 ...
## ..$ reverse : int [1:1878] 1 2 1 3 2 4 7 610 3 11 ...
## ..$ nmatch : int [1:1878] 49 29 49 54 29 54 49 54 54 37 ...
## ..$ nmismatch: int [1:1878] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:1878] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:1878] 2 2 2 2 2 2 2 2 2 2 ...
## ..$ accept : logi [1:1878] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ B2_2022_R1_filtered_2022.fastq.gz :'data.frame': 3835 obs. of 9 variables:
## ..$ sequence : chr [1:3835] "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGAGGAGGAAGGCATTGTGGTTAATAA"| __truncated__ "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ ...
## ..$ abundance: int [1:3835] 1442 985 685 512 507 497 490 437 408 286 ...
## ..$ forward : int [1:3835] 2 1 3 7 4 5 6 11 10 15 ...
## ..$ reverse : int [1:3835] 1 2 4 3 1 3 5 7 6 1 ...
## ..$ nmatch : int [1:3835] 29 29 54 49 29 49 54 49 29 29 ...
## ..$ nmismatch: int [1:3835] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:3835] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:3835] 2 1 2 2 2 2 2 2 2 2 ...
## ..$ accept : logi [1:3835] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ B10_2022_R1_filtered_2022.fastq.gz:'data.frame': 3650 obs. of 9 variables:
## ..$ sequence : chr [1:3650] "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCAGTAAGATAATAC"| __truncated__ ...
## ..$ abundance: int [1:3650] 3851 3030 2261 1054 949 693 655 606 415 391 ...
## ..$ forward : int [1:3650] 1 2 3 4 6 7 5 10 9 49 ...
## ..$ reverse : int [1:3650] 1 2 2 1 3 4 1 7 6 2 ...
## ..$ nmatch : int [1:3650] 29 49 49 29 29 29 29 49 29 49 ...
## ..$ nmismatch: int [1:3650] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:3650] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:3650] 2 2 2 2 2 2 2 2 2 2 ...
## ..$ accept : logi [1:3650] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ G8_2022_R1_filtered_2022.fastq.gz :'data.frame': 3069 obs. of 9 variables:
## ..$ sequence : chr [1:3069] "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ ...
## ..$ abundance: int [1:3069] 1055 1029 1014 947 609 491 435 381 361 331 ...
## ..$ forward : int [1:3069] 2 1 3 4 6 5 8 10 11 9 ...
## ..$ reverse : int [1:3069] 4 2 3 3 1 2 1 1 1 6 ...
## ..$ nmatch : int [1:3069] 29 29 49 49 29 29 29 29 29 29 ...
## ..$ nmismatch: int [1:3069] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:3069] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:3069] 2 2 2 2 2 2 2 2 2 2 ...
## ..$ accept : logi [1:3069] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ G16_2022_R1_filtered_2022.fastq.gz:'data.frame': 5764 obs. of 9 variables:
## ..$ sequence : chr [1:5764] "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ "GAGGAATATTGGACAATGGGTGAGAGCCTGATCCAGCCATCCCGCGTGAAGGACTAAGGCCCTATGGGTTGTAAACTTCTTTTATACTGGGATAAACCTACTTACGTGTAA"| __truncated__ ...
## ..$ abundance: int [1:5764] 4317 2800 1592 1329 1324 1254 1186 1096 944 932 ...
## ..$ forward : int [1:5764] 2 3 1 8 6 11 5 12 1 1 ...
## ..$ reverse : int [1:5764] 1 1 2 6 1 1 7 1 4 9 ...
## ..$ nmatch : int [1:5764] 29 29 29 34 29 29 34 29 29 29 ...
## ..$ nmismatch: int [1:5764] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:5764] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:5764] 2 2 1 2 2 2 1 2 1 1 ...
## ..$ accept : logi [1:5764] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ G24_2022_R1_filtered_2022.fastq.gz:'data.frame': 6675 obs. of 9 variables:
## ..$ sequence : chr [1:6675] "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGTAGGGAGGAAAGGGTGTAACTTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCCATTACCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ ...
## ..$ abundance: int [1:6675] 4489 3076 1285 1218 1108 1053 1032 849 772 765 ...
## ..$ forward : int [1:6675] 1 2 3 7 4 8 6 9 5 10 ...
## ..$ reverse : int [1:6675] 1 3 2 6 5 8 2 4 7 4 ...
## ..$ nmatch : int [1:6675] 29 29 29 54 29 49 29 49 29 49 ...
## ..$ nmismatch: int [1:6675] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:6675] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:6675] 2 2 2 2 2 2 2 2 2 2 ...
## ..$ accept : logi [1:6675] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ G32_2022_R1_filtered_2022.fastq.gz:'data.frame': 1430 obs. of 9 variables:
## ..$ sequence : chr [1:1430] "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGCAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ ...
## ..$ abundance: int [1:1430] 1102 769 601 355 349 328 237 175 150 148 ...
## ..$ forward : int [1:1430] 1 2 4 9 5 3 6 12 11 8 ...
## ..$ reverse : int [1:1430] 1 1 5 8 3 2 2 6 2 4 ...
## ..$ nmatch : int [1:1430] 49 49 54 49 29 29 29 29 29 29 ...
## ..$ nmismatch: int [1:1430] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:1430] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:1430] 2 2 2 2 2 2 2 2 2 2 ...
## ..$ accept : logi [1:1430] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ M5_2022_R1_filtered_2022.fastq.gz :'data.frame': 8678 obs. of 9 variables:
## ..$ sequence : chr [1:8678] "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCAGTAAGCGAATAC"| __truncated__ ...
## ..$ abundance: int [1:8678] 3929 3927 3927 3264 3188 1449 1172 887 883 794 ...
## ..$ forward : int [1:8678] 4 3 5 1 2 6 1 8 1 9 ...
## ..$ reverse : int [1:8678] 2 1 2 3 1 4 1 5 6 5 ...
## ..$ nmatch : int [1:8678] 49 29 49 29 29 29 29 29 29 29 ...
## ..$ nmismatch: int [1:8678] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:8678] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:8678] 2 2 2 1 2 2 2 2 1 2 ...
## ..$ accept : logi [1:8678] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ S3_2022_R1_filtered_2022.fastq.gz :'data.frame': 3784 obs. of 9 variables:
## ..$ sequence : chr [1:3784] "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "AGGGAATCTTCCGCAATGGACGAAAGTCTGACGGAGCAACGCCGCGTGAGTGATGAAGGCTTTCGGGTCGTAAAACTCTGTTGTTAGGGAAGAACAAGTACGAGAGTAACT"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGGGTGAAGAAGGCCTTAGGGTTGTAAACCCCTTTCAGCGGGGAAGATAATGACGGTACCCGCAG"| __truncated__ ...
## ..$ abundance: int [1:3784] 487 472 377 286 239 154 150 145 137 137 ...
## ..$ forward : int [1:3784] 1 3 2 4 7 14 8 15 16 11 ...
## ..$ reverse : int [1:3784] 1 1 2 3 3 4 2 19 8 6 ...
## ..$ nmatch : int [1:3784] 49 49 29 54 54 51 29 49 29 54 ...
## ..$ nmismatch: int [1:3784] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:3784] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:3784] 2 2 2 2 2 2 2 2 2 2 ...
## ..$ accept : logi [1:3784] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ B3_2022_R1_filtered_2022.fastq.gz :'data.frame': 5754 obs. of 9 variables:
## ..$ sequence : chr [1:5754] "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGAGGAGGAAGGCATTGTGGTTAATAA"| __truncated__ "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ ...
## ..$ abundance: int [1:5754] 731 651 615 427 420 373 324 298 273 262 ...
## ..$ forward : int [1:5754] 2 3 1 5 4 9 7 6 10 13 ...
## ..$ reverse : int [1:5754] 1 1 3 5 7 4 2 2 9 6 ...
## ..$ nmatch : int [1:5754] 49 49 29 54 54 51 29 29 54 54 ...
## ..$ nmismatch: int [1:5754] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:5754] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:5754] 2 2 2 2 2 2 2 2 2 2 ...
## ..$ accept : logi [1:5754] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ G1_2022_R1_filtered_2022.fastq.gz :'data.frame': 6588 obs. of 9 variables:
## ..$ sequence : chr [1:6588] "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTGGTTGGCTAATAC"| __truncated__ "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ ...
## ..$ abundance: int [1:6588] 5001 1831 1472 1400 1055 1015 953 953 888 780 ...
## ..$ forward : int [1:6588] 1 2 5 6 9 10 3 8 11 7 ...
## ..$ reverse : int [1:6588] 1 1 6 3 1 3 5 2 1 7 ...
## ..$ nmatch : int [1:6588] 29 29 54 49 29 49 29 29 29 29 ...
## ..$ nmismatch: int [1:6588] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:6588] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:6588] 2 2 2 2 2 2 2 2 2 2 ...
## ..$ accept : logi [1:6588] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ G9_2022_R1_filtered_2022.fastq.gz :'data.frame': 5401 obs. of 9 variables:
## ..$ sequence : chr [1:5401] "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTGGTTGGCTAATAC"| __truncated__ ...
## ..$ abundance: int [1:5401] 11407 2544 1160 986 945 569 517 505 500 471 ...
## ..$ forward : int [1:5401] 1 2 5 6 4 13 1 10 3 11 ...
## ..$ reverse : int [1:5401] 1 2 3 3 4 9 10 3 5 3 ...
## ..$ nmatch : int [1:5401] 54 29 29 29 29 49 54 29 29 29 ...
## ..$ nmismatch: int [1:5401] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:5401] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:5401] 1 2 2 2 1 2 1 2 1 2 ...
## ..$ accept : logi [1:5401] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ G17_2022_R1_filtered_2022.fastq.gz:'data.frame': 6930 obs. of 9 variables:
## ..$ sequence : chr [1:6930] "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTGGTTGGCTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCAGTAACTTAATAC"| __truncated__ ...
## ..$ abundance: int [1:6930] 6375 2959 1840 1264 1216 1042 893 865 795 707 ...
## ..$ forward : int [1:6930] 1 2 4 5 3 6 10 7 11 8 ...
## ..$ reverse : int [1:6930] 2 1 1 3 3 6 1 5 1 3 ...
## ..$ nmatch : int [1:6930] 54 29 29 29 29 29 29 29 29 29 ...
## ..$ nmismatch: int [1:6930] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:6930] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:6930] 2 2 2 2 2 1 2 2 2 2 ...
## ..$ accept : logi [1:6930] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ G25_2022_R1_filtered_2022.fastq.gz:'data.frame': 7429 obs. of 9 variables:
## ..$ sequence : chr [1:7429] "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCCATTACCTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCCATTACCTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATAC"| __truncated__ ...
## ..$ abundance: int [1:7429] 19022 3456 3078 3048 2755 2098 1871 1489 1452 1050 ...
## ..$ forward : int [1:7429] 1 2 1 3 2 4 1 3 2 6 ...
## ..$ reverse : int [1:7429] 1 2 2 2 1 3 4 1 6 7 ...
## ..$ nmatch : int [1:7429] 29 29 29 29 29 54 29 29 29 34 ...
## ..$ nmismatch: int [1:7429] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:7429] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:7429] 2 2 1 2 2 2 1 2 1 2 ...
## ..$ accept : logi [1:7429] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ G33_2022_R1_filtered_2022.fastq.gz:'data.frame': 6853 obs. of 9 variables:
## ..$ sequence : chr [1:6853] "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATAC"| __truncated__ ...
## ..$ abundance: int [1:6853] 1539 1375 1125 567 497 442 440 436 353 341 ...
## ..$ forward : int [1:6853] 1 2 3 4 6 9 7 5 8 10 ...
## ..$ reverse : int [1:6853] 1 1 3 4 2 8 7 5 6 12 ...
## ..$ nmatch : int [1:6853] 49 49 54 29 29 49 29 29 29 54 ...
## ..$ nmismatch: int [1:6853] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:6853] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:6853] 2 2 2 2 2 2 2 2 2 2 ...
## ..$ accept : logi [1:6853] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ V1_2022_R1_filtered_2022.fastq.gz :'data.frame': 5647 obs. of 9 variables:
## ..$ sequence : chr [1:5647] "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGAGGAGGAAGGCATTGTGGTTAATAA"| __truncated__ ...
## ..$ abundance: int [1:5647] 1970 937 770 744 693 665 593 490 453 421 ...
## ..$ forward : int [1:5647] 1 4 5 3 7 6 2 8 11 9 ...
## ..$ reverse : int [1:5647] 2 3 1 5 3 1 4 7 11 1 ...
## ..$ nmatch : int [1:5647] 29 49 29 29 49 29 29 54 54 29 ...
## ..$ nmismatch: int [1:5647] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:5647] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:5647] 2 2 2 1 2 2 2 2 2 2 ...
## ..$ accept : logi [1:5647] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ S4_2022_R1_filtered_2022.fastq.gz :'data.frame': 2606 obs. of 9 variables:
## ..$ sequence : chr [1:2606] "AGGGAATCTTCCGCAATGGACGAAAGTCTGACGGAGCAACGCCGCGTGAGTGATGAAGGCTTTCGGGTCGTAAAACTCTGTTGTTAGGGAAGAACAAGTACGAGAGTAACT"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGGGTGAAGAAGGCCTTAGGGTTGTAAACCCCTTTCAGCGGGGAAGATAATGACGGTACCCGCAG"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ ...
## ..$ abundance: int [1:2606] 265 231 226 213 162 153 129 127 126 125 ...
## ..$ forward : int [1:2606] 1 2 3 234 11 6 19 5 18 86 ...
## ..$ reverse : int [1:2606] 2 1 3 3 24 1 18 7 4 2 ...
## ..$ nmatch : int [1:2606] 29 54 49 49 49 54 48 54 51 29 ...
## ..$ nmismatch: int [1:2606] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ nindel : int [1:2606] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ prefer : num [1:2606] 2 2 2 2 1 2 2 2 2 2 ...
## ..$ accept : logi [1:2606] TRUE TRUE TRUE TRUE TRUE TRUE ...
# Create the ASV Count Table
raw_ASV_table2 <- makeSequenceTable(merged_ASVs2)
# Write out the file to data/01_DADA2
raw_ASV_table2_path <- "data/01_DADA2/raw_ASV_table2"
# Check the type and dimensions of the data
dim(raw_ASV_table2)
## [1] 59 169619
class(raw_ASV_table2)
## [1] "matrix" "array"
typeof(raw_ASV_table2)
## [1] "integer"
# Inspect the distribution of sequence lengths of all ASVs in dataset
table(nchar(getSequences(raw_ASV_table2)))
##
## 242 245 247 248 253 255 256 259 265 266 267 275 280
## 1 1 1 1 1 1 1 4 1 4 1 1 1
## 295 314 315 316 318 319 322 325 332 336 340 342 344
## 1 1 1 1 1 1 1 3 1 3 1 7 1
## 348 354 355 357 358 359 362 363 364 366 367 369 370
## 1 1 4 6 3 1 1 3 1 1 1 14 1
## 371 372 373 376 377 379 381 382 383 385 386 388 389
## 1 2 5 2 1 13 1 7 8 3 3 3 8
## 390 391 393 394 395 396 398 399 400 401 402 403 404
## 2 1 1 1 5 2 6 42 336 21390 4103 1477 1435
## 405 406 407 408 409 410 411 412 413 414 415 416 417
## 301 15705 672 612 399 492 82 345 127 241 490 274 135
## 418 419 420 421 422 423 424 425 426 427 428 429 430
## 918 869 744 6034 393 1571 1493 14909 77420 15592 742 34 15
## 431 432 433 434 435 436 437 438 442 443
## 1 3 5 1 3 19 3 12 3 22
# Inspect the distribution of sequence lengths of all ASVs in dataset
# AFTER TRIM
data.frame(Seq_Length = nchar(getSequences(raw_ASV_table2))) %>%
ggplot(aes(x = Seq_Length )) +
geom_histogram() +
labs(title = "Raw distribution of ASV length")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# We will allow for a few
raw_ASV_table_trimmed2 <- raw_ASV_table2[,nchar(colnames(raw_ASV_table2)) %in% 350:450]
# Inspect the distribution of sequence lengths of all ASVs in dataset
table(nchar(getSequences(raw_ASV_table_trimmed2)))
##
## 354 355 357 358 359 362 363 364 366 367 369 370 371
## 1 4 6 3 1 1 3 1 1 1 14 1 1
## 372 373 376 377 379 381 382 383 385 386 388 389 390
## 2 5 2 1 13 1 7 8 3 3 3 8 2
## 391 393 394 395 396 398 399 400 401 402 403 404 405
## 1 1 1 5 2 6 42 336 21390 4103 1477 1435 301
## 406 407 408 409 410 411 412 413 414 415 416 417 418
## 15705 672 612 399 492 82 345 127 241 490 274 135 918
## 419 420 421 422 423 424 425 426 427 428 429 430 431
## 869 744 6034 393 1571 1493 14909 77420 15592 742 34 15 1
## 432 433 434 435 436 437 438 442 443
## 3 5 1 3 19 3 12 3 22
# What proportion is left of the sequences?
sum(raw_ASV_table_trimmed2)/sum(raw_ASV_table2)
## [1] 0.9999665
# Inspect the distribution of sequence lengths of all ASVs in dataset
# AFTER TRIM
data.frame(Seq_Length = nchar(getSequences(raw_ASV_table_trimmed2))) %>%
ggplot(aes(x = Seq_Length )) +
geom_histogram() +
labs(title = "Trimmed distribution of ASV length")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Note the peak at 249 is ABOVE 3000
# Let's zoom in on the plot
data.frame(Seq_Length = nchar(getSequences(raw_ASV_table_trimmed2))) %>%
ggplot(aes(x = Seq_Length )) +
geom_histogram() +
labs(title = "Trimmed distribution of ASV length") +
scale_y_continuous(limits = c(0, 500))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 9 rows containing missing values or values outside the scale range
## (`geom_bar()`).
I will keep these ASVs and potentially get more stringent later if it seems logical to do so.
Sometimes chimeras arise in our workflow.
Chimeric sequences are artificial sequences formed by the combination of two or more distinct biological sequences. These chimeric sequences can arise during the polymerase chain reaction (PCR) amplification step of the 16S rRNA gene, where fragments from different templates can be erroneously joined together.
Chimera removal is an essential step in the analysis of 16S sequencing data to improve the accuracy of downstream analyses, such as taxonomic assignment and diversity assessment. It helps to avoid the inclusion of misleading or spurious sequences that could lead to incorrect biological interpretations.
# Remove the chimeras in the raw ASV table
noChimeras_ASV_table2 <- removeBimeraDenovo(raw_ASV_table_trimmed2,
method="consensus",
multithread=TRUE, verbose=TRUE)
## Identified 113004 bimeras out of 169576 input sequences.
# Check the dimensions
dim(noChimeras_ASV_table2)
## [1] 59 56572
# What proportion is left of the sequences?
sum(noChimeras_ASV_table2)/sum(raw_ASV_table_trimmed2)
## [1] 0.7083524
sum(noChimeras_ASV_table2)/sum(raw_ASV_table2)
## [1] 0.7083287
# Plot it
data.frame(Seq_Length_NoChim2 = nchar(getSequences(noChimeras_ASV_table2))) %>%
ggplot(aes(x = Seq_Length_NoChim2 )) +
geom_histogram()+
labs(title = "Trimmed + Chimera Removal distribution of ASV length")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
We retained 70% after trimming chimeras
Here, we will look at the number of reads that were lost in the filtering, denoising, merging, and chimera removal.
# A little function to identify number seqs
getN2 <- function(x) sum(getUniques(x))
# Make the table to track the seqs
track2 <- cbind(filtered_reads2,
sapply(dada_forward2, getN2),
sapply(dada_reverse2, getN2),
sapply(merged_ASVs2, getN2),
rowSums(noChimeras_ASV_table2))
head(track2)
## reads.in
## 13915_11671_187921_KTY92_Plate1_A06_B4_TTCCAAGG_CCTTGTAG_R1.fastq.gz 204642
## 13915_11671_187921_KTY92_Plate1_A07_G2_CGCATGAT_TCAGGCTT_R1.fastq.gz 175656
## 13915_11671_187921_KTY92_Plate1_A08_G10_ACGGAACA_GTTCTCGT_R1.fastq.gz 138589
## 13915_11671_187921_KTY92_Plate1_A09_G18_CGGCTAAT_AGAACGAG_R1.fastq.gz 202865
## 13915_11671_187921_KTY92_Plate1_A10_G26_ATCGATCG_TGCTTCCA_R1.fastq.gz 202611
## 13915_11671_187921_KTY92_Plate1_A11_G34_GCAAGATC_CTTCGACT_R1.fastq.gz 188583
## reads.out
## 13915_11671_187921_KTY92_Plate1_A06_B4_TTCCAAGG_CCTTGTAG_R1.fastq.gz 112436
## 13915_11671_187921_KTY92_Plate1_A07_G2_CGCATGAT_TCAGGCTT_R1.fastq.gz 109166
## 13915_11671_187921_KTY92_Plate1_A08_G10_ACGGAACA_GTTCTCGT_R1.fastq.gz 77962
## 13915_11671_187921_KTY92_Plate1_A09_G18_CGGCTAAT_AGAACGAG_R1.fastq.gz 119856
## 13915_11671_187921_KTY92_Plate1_A10_G26_ATCGATCG_TGCTTCCA_R1.fastq.gz 123024
## 13915_11671_187921_KTY92_Plate1_A11_G34_GCAAGATC_CTTCGACT_R1.fastq.gz 117249
##
## 13915_11671_187921_KTY92_Plate1_A06_B4_TTCCAAGG_CCTTGTAG_R1.fastq.gz 101452
## 13915_11671_187921_KTY92_Plate1_A07_G2_CGCATGAT_TCAGGCTT_R1.fastq.gz 102127
## 13915_11671_187921_KTY92_Plate1_A08_G10_ACGGAACA_GTTCTCGT_R1.fastq.gz 71014
## 13915_11671_187921_KTY92_Plate1_A09_G18_CGGCTAAT_AGAACGAG_R1.fastq.gz 107161
## 13915_11671_187921_KTY92_Plate1_A10_G26_ATCGATCG_TGCTTCCA_R1.fastq.gz 109470
## 13915_11671_187921_KTY92_Plate1_A11_G34_GCAAGATC_CTTCGACT_R1.fastq.gz 107939
##
## 13915_11671_187921_KTY92_Plate1_A06_B4_TTCCAAGG_CCTTGTAG_R1.fastq.gz 102760
## 13915_11671_187921_KTY92_Plate1_A07_G2_CGCATGAT_TCAGGCTT_R1.fastq.gz 103285
## 13915_11671_187921_KTY92_Plate1_A08_G10_ACGGAACA_GTTCTCGT_R1.fastq.gz 71718
## 13915_11671_187921_KTY92_Plate1_A09_G18_CGGCTAAT_AGAACGAG_R1.fastq.gz 109968
## 13915_11671_187921_KTY92_Plate1_A10_G26_ATCGATCG_TGCTTCCA_R1.fastq.gz 112127
## 13915_11671_187921_KTY92_Plate1_A11_G34_GCAAGATC_CTTCGACT_R1.fastq.gz 109250
##
## 13915_11671_187921_KTY92_Plate1_A06_B4_TTCCAAGG_CCTTGTAG_R1.fastq.gz 49650
## 13915_11671_187921_KTY92_Plate1_A07_G2_CGCATGAT_TCAGGCTT_R1.fastq.gz 70513
## 13915_11671_187921_KTY92_Plate1_A08_G10_ACGGAACA_GTTCTCGT_R1.fastq.gz 39986
## 13915_11671_187921_KTY92_Plate1_A09_G18_CGGCTAAT_AGAACGAG_R1.fastq.gz 56269
## 13915_11671_187921_KTY92_Plate1_A10_G26_ATCGATCG_TGCTTCCA_R1.fastq.gz 54587
## 13915_11671_187921_KTY92_Plate1_A11_G34_GCAAGATC_CTTCGACT_R1.fastq.gz 66448
##
## 13915_11671_187921_KTY92_Plate1_A06_B4_TTCCAAGG_CCTTGTAG_R1.fastq.gz 40089
## 13915_11671_187921_KTY92_Plate1_A07_G2_CGCATGAT_TCAGGCTT_R1.fastq.gz 48534
## 13915_11671_187921_KTY92_Plate1_A08_G10_ACGGAACA_GTTCTCGT_R1.fastq.gz 29166
## 13915_11671_187921_KTY92_Plate1_A09_G18_CGGCTAAT_AGAACGAG_R1.fastq.gz 43634
## 13915_11671_187921_KTY92_Plate1_A10_G26_ATCGATCG_TGCTTCCA_R1.fastq.gz 43943
## 13915_11671_187921_KTY92_Plate1_A11_G34_GCAAGATC_CTTCGACT_R1.fastq.gz 51221
# Update column names to be more informative (most are missing at the moment!)
colnames(track2) <- c("input", "filtered", "denoisedF", "denoisedR", "merged", "nochim")
rownames(track2) <- samples2
# Generate a dataframe to track the reads through our DADA2 pipeline
track_read_counts2 <-
track2 %>%
# make it a dataframe
as.data.frame() %>%
rownames_to_column(var = "names") %>%
mutate(perc_reads_retained = 100 * nochim / input)
# Visualize it in table format
DT::datatable(track_read_counts2)
# Plot it!
track_read_counts2 %>%
pivot_longer(input:nochim, names_to = "read_type", values_to = "num_reads") %>%
mutate(read_type = fct_relevel(read_type,
"input", "filtered", "denoisedF", "denoisedR", "merged", "nochim")) %>%
ggplot(aes(x = read_type, y = num_reads, fill = read_type)) +
geom_line(aes(group = names), color = "grey") +
geom_point(shape = 21, size = 3, alpha = 0.8) +
scale_fill_brewer(palette = "Spectral") +
labs(x = "Filtering Step", y = "Number of Sequences") +
theme_bw()
There is quite a drastic drop from raw to filtered, trimmed, no chimeras data, but it seems like the sequencing depth was enough that this will still be acceptable.
Here, we will use the silva database version 138!
# Classify the ASVs against a reference set using the RDP Naive Bayesian Classifier described by Wang et al., (2007) in AEM
taxa_train2 <-
assignTaxonomy(noChimeras_ASV_table2,
"/workdir/in_class_data/taxonomy/silva_nr99_v138.1_train_set.fa.gz",
multithread=TRUE)
# Add the genus/species information
taxa_addSpecies2 <-
addSpecies(taxa_train2,
"/workdir/in_class_data/taxonomy/silva_species_assignment_v138.1.fa.gz")
# Inspect the taxonomy
taxa_print2 <- taxa_addSpecies2 # Removing sequence rownames for display only
rownames(taxa_print2) <- NULL
#View(taxa_print2)
Below, we will prepare the following:
ASV_fastas: A fasta file that we can use to build a
tree for phylogenetic analyses (e.g. phylogenetic alpha diversity
metrics or UNIFRAC dissimilarty).########### 2. COUNT TABLE ###############
############## Modify the ASV names and then save a fasta file! ##############
# Give headers more manageable names
# First pull the ASV sequences
asv_seqs2 <- colnames(noChimeras_ASV_table2)
asv_seqs2[1:5]
## [1] "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGGAGGAAGCCACGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGATTTACTGGGCGTAAAGGATGCGTAGGCGGACATTTAAGTCAGATGTGAAATACCCGAGCTTAACTTGGGTGCTGCATTTGAAACTGGGTGTCTAGAGTGCAGGAGAGGTAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGAGATTAGGAAGAACACCAGTGGCGAAGGCGACTTACTGGACTGTAACTGACGCTGAGGCATGAAAGCGTGGGGAGCAAACA"
## [2] "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATACGTATCAACTGTGACGTTACTCGCAGAAGAAGCACCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTGGATAAGTTAGATGTGAAAGCCCCGGGCTCAACCTGGGAATTGCATTTAAAACTGTCCAGCTAGAGTCTTGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACAAAGACTGACGCTCAGGTGCGAAAGCGTGGGGAGCAAACA"
## [3] "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACCTGCAGAAGAAGCGCCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGCGCAAGCGTTATCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTTTGTCGCGTCTGCCGTGAAAGTCCGGGGCTCAACTCCGGATCTGCGGTGGGTACGGGCAGACTAGAGTGATGTAGGGGAGACTGGAATTCCTGGTGTAGCGGTGAAATGCGCAGATATCAGGAGGAACACCGATGGCGAAGGCAGGTCTCTGGGCATTAACTGACGCTGAGGAGCGAAAGCATGGGGAGCGAACA"
## [4] "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACCTGCAGAAGAAGCGCCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGCGCAAGCGTTATCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTTTGTCGCGTCTGCCGTGAAAGTCCGGGGCTCAACTCCGGATCTGCGGTGGGTACGGGCAGACTAGAGTGATGTAGGGGAGACTGGAATTCCTGGTGTAGCGGTGAAATGCGCAGATATCAGGAGGAACACCGATGGCGAAGGCAGGTCTCTGGGCATTAACTGACGCTGAGGAGCGAAAGCATGGGGAGCGAACA"
## [5] "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATACGTAAGTGTTTTGACGTTACCGACAGAATAAGCACCGGCTAACTCTGTGCCAGCAGCCGCGGTAATACAGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTCGTTAAGTTGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATTCAAAACTGTCGAGCTAGAGTATGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACCACCTGGACTGATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACA"
# make headers for our ASV seq fasta file, which will be our asv names
asv_headers2 <- vector(dim(noChimeras_ASV_table2)[2], mode = "character")
asv_headers2[1:5]
## [1] "" "" "" "" ""
# loop through vector and fill it in with ASV names
for (i in 1:dim(noChimeras_ASV_table2)[2]) {
asv_headers2[i] <- paste(">ASV", i, sep = "_")
}
# intitution check
asv_headers2[1:5]
## [1] ">ASV_1" ">ASV_2" ">ASV_3" ">ASV_4" ">ASV_5"
##### Rename ASVs in table then write out our ASV fasta file!
#View(noChimeras_ASV_table)
asv_tab2 <- t(noChimeras_ASV_table2)
#View(asv_tab2)
## Rename our asvs!
row.names(asv_tab2) <- sub(">", "", asv_headers2)
#View(asv_tab2)
# Inspect the taxonomy table
#View(taxa_addSpecies2)
##### Prepare tax table
# Add the ASV sequences from the rownames to a column
new_tax_tab2 <-
taxa_addSpecies2%>%
as.data.frame() %>%
rownames_to_column(var = "ASVseqs")
head(new_tax_tab2)
## ASVseqs
## 1 GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGGAGGAAGCCACGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGATTTACTGGGCGTAAAGGATGCGTAGGCGGACATTTAAGTCAGATGTGAAATACCCGAGCTTAACTTGGGTGCTGCATTTGAAACTGGGTGTCTAGAGTGCAGGAGAGGTAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGAGATTAGGAAGAACACCAGTGGCGAAGGCGACTTACTGGACTGTAACTGACGCTGAGGCATGAAAGCGTGGGGAGCAAACA
## 2 GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATACGTATCAACTGTGACGTTACTCGCAGAAGAAGCACCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTGGATAAGTTAGATGTGAAAGCCCCGGGCTCAACCTGGGAATTGCATTTAAAACTGTCCAGCTAGAGTCTTGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACAAAGACTGACGCTCAGGTGCGAAAGCGTGGGGAGCAAACA
## 3 GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACCTGCAGAAGAAGCGCCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGCGCAAGCGTTATCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTTTGTCGCGTCTGCCGTGAAAGTCCGGGGCTCAACTCCGGATCTGCGGTGGGTACGGGCAGACTAGAGTGATGTAGGGGAGACTGGAATTCCTGGTGTAGCGGTGAAATGCGCAGATATCAGGAGGAACACCGATGGCGAAGGCAGGTCTCTGGGCATTAACTGACGCTGAGGAGCGAAAGCATGGGGAGCGAACA
## 4 GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACCTGCAGAAGAAGCGCCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGCGCAAGCGTTATCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTTTGTCGCGTCTGCCGTGAAAGTCCGGGGCTCAACTCCGGATCTGCGGTGGGTACGGGCAGACTAGAGTGATGTAGGGGAGACTGGAATTCCTGGTGTAGCGGTGAAATGCGCAGATATCAGGAGGAACACCGATGGCGAAGGCAGGTCTCTGGGCATTAACTGACGCTGAGGAGCGAAAGCATGGGGAGCGAACA
## 5 GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATACGTAAGTGTTTTGACGTTACCGACAGAATAAGCACCGGCTAACTCTGTGCCAGCAGCCGCGGTAATACAGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTCGTTAAGTTGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATTCAAAACTGTCGAGCTAGAGTATGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACCACCTGGACTGATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACA
## 6 GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATACGTTAGTGTTTTGACGTTACCGACAGAATAAGCACCGGCTAACTCTGTGCCAGCAGCCGCGGTAATACAGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTTGTTAAGTTGGATGTGAAAGCCCCGGGCTCAACCTGGGAACTGCATTCAAAACTGACAAGCTAGAGTATGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACCACCTGGACTGATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACA
## Kingdom Phylum Class Order
## 1 Bacteria Firmicutes Clostridia Clostridiales
## 2 Bacteria Proteobacteria Gammaproteobacteria Enterobacterales
## 3 Bacteria Actinobacteriota Actinobacteria Micrococcales
## 4 Bacteria Actinobacteriota Actinobacteria Micrococcales
## 5 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales
## 6 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales
## Family Genus Species
## 1 Clostridiaceae Clostridium sensu stricto 13 <NA>
## 2 Aeromonadaceae Aeromonas <NA>
## 3 Micrococcaceae Pseudarthrobacter <NA>
## 4 Micrococcaceae Pseudarthrobacter <NA>
## 5 Pseudomonadaceae Pseudomonas <NA>
## 6 Pseudomonadaceae Pseudomonas <NA>
# intution check
stopifnot(new_tax_tab2$ASVseqs == colnames(noChimeras_ASV_table2))
# Now let's add the ASV names
rownames(new_tax_tab2) <- rownames(asv_tab2)
head(new_tax_tab2)
## ASVseqs
## ASV_1 GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGGAGGAAGCCACGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGATTTACTGGGCGTAAAGGATGCGTAGGCGGACATTTAAGTCAGATGTGAAATACCCGAGCTTAACTTGGGTGCTGCATTTGAAACTGGGTGTCTAGAGTGCAGGAGAGGTAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGAGATTAGGAAGAACACCAGTGGCGAAGGCGACTTACTGGACTGTAACTGACGCTGAGGCATGAAAGCGTGGGGAGCAAACA
## ASV_2 GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATACGTATCAACTGTGACGTTACTCGCAGAAGAAGCACCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTGGATAAGTTAGATGTGAAAGCCCCGGGCTCAACCTGGGAATTGCATTTAAAACTGTCCAGCTAGAGTCTTGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACAAAGACTGACGCTCAGGTGCGAAAGCGTGGGGAGCAAACA
## ASV_3 GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACCTGCAGAAGAAGCGCCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGCGCAAGCGTTATCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTTTGTCGCGTCTGCCGTGAAAGTCCGGGGCTCAACTCCGGATCTGCGGTGGGTACGGGCAGACTAGAGTGATGTAGGGGAGACTGGAATTCCTGGTGTAGCGGTGAAATGCGCAGATATCAGGAGGAACACCGATGGCGAAGGCAGGTCTCTGGGCATTAACTGACGCTGAGGAGCGAAAGCATGGGGAGCGAACA
## ASV_4 GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACCTGCAGAAGAAGCGCCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGCGCAAGCGTTATCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTTTGTCGCGTCTGCCGTGAAAGTCCGGGGCTCAACTCCGGATCTGCGGTGGGTACGGGCAGACTAGAGTGATGTAGGGGAGACTGGAATTCCTGGTGTAGCGGTGAAATGCGCAGATATCAGGAGGAACACCGATGGCGAAGGCAGGTCTCTGGGCATTAACTGACGCTGAGGAGCGAAAGCATGGGGAGCGAACA
## ASV_5 GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATACGTAAGTGTTTTGACGTTACCGACAGAATAAGCACCGGCTAACTCTGTGCCAGCAGCCGCGGTAATACAGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTCGTTAAGTTGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATTCAAAACTGTCGAGCTAGAGTATGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACCACCTGGACTGATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACA
## ASV_6 GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATACGTTAGTGTTTTGACGTTACCGACAGAATAAGCACCGGCTAACTCTGTGCCAGCAGCCGCGGTAATACAGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTTGTTAAGTTGGATGTGAAAGCCCCGGGCTCAACCTGGGAACTGCATTCAAAACTGACAAGCTAGAGTATGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACCACCTGGACTGATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACA
## Kingdom Phylum Class Order
## ASV_1 Bacteria Firmicutes Clostridia Clostridiales
## ASV_2 Bacteria Proteobacteria Gammaproteobacteria Enterobacterales
## ASV_3 Bacteria Actinobacteriota Actinobacteria Micrococcales
## ASV_4 Bacteria Actinobacteriota Actinobacteria Micrococcales
## ASV_5 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales
## ASV_6 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales
## Family Genus Species
## ASV_1 Clostridiaceae Clostridium sensu stricto 13 <NA>
## ASV_2 Aeromonadaceae Aeromonas <NA>
## ASV_3 Micrococcaceae Pseudarthrobacter <NA>
## ASV_4 Micrococcaceae Pseudarthrobacter <NA>
## ASV_5 Pseudomonadaceae Pseudomonas <NA>
## ASV_6 Pseudomonadaceae Pseudomonas <NA>
### Final prep of tax table. Add new column with ASV names
asv_tax2 <-
new_tax_tab2 %>%
# add rownames from count table for phyloseq handoff
mutate(ASV = rownames(asv_tab2)) %>%
# Resort the columns with select
dplyr::select(Kingdom, Phylum, Class, Order, Family, Genus, Species, ASV, ASVseqs)
head(asv_tax2)
## Kingdom Phylum Class Order
## ASV_1 Bacteria Firmicutes Clostridia Clostridiales
## ASV_2 Bacteria Proteobacteria Gammaproteobacteria Enterobacterales
## ASV_3 Bacteria Actinobacteriota Actinobacteria Micrococcales
## ASV_4 Bacteria Actinobacteriota Actinobacteria Micrococcales
## ASV_5 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales
## ASV_6 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales
## Family Genus Species ASV
## ASV_1 Clostridiaceae Clostridium sensu stricto 13 <NA> ASV_1
## ASV_2 Aeromonadaceae Aeromonas <NA> ASV_2
## ASV_3 Micrococcaceae Pseudarthrobacter <NA> ASV_3
## ASV_4 Micrococcaceae Pseudarthrobacter <NA> ASV_4
## ASV_5 Pseudomonadaceae Pseudomonas <NA> ASV_5
## ASV_6 Pseudomonadaceae Pseudomonas <NA> ASV_6
## ASVseqs
## ASV_1 GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGGAGGAAGCCACGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGATTTACTGGGCGTAAAGGATGCGTAGGCGGACATTTAAGTCAGATGTGAAATACCCGAGCTTAACTTGGGTGCTGCATTTGAAACTGGGTGTCTAGAGTGCAGGAGAGGTAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGAGATTAGGAAGAACACCAGTGGCGAAGGCGACTTACTGGACTGTAACTGACGCTGAGGCATGAAAGCGTGGGGAGCAAACA
## ASV_2 GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATACGTATCAACTGTGACGTTACTCGCAGAAGAAGCACCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTGGATAAGTTAGATGTGAAAGCCCCGGGCTCAACCTGGGAATTGCATTTAAAACTGTCCAGCTAGAGTCTTGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACAAAGACTGACGCTCAGGTGCGAAAGCGTGGGGAGCAAACA
## ASV_3 GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACCTGCAGAAGAAGCGCCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGCGCAAGCGTTATCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTTTGTCGCGTCTGCCGTGAAAGTCCGGGGCTCAACTCCGGATCTGCGGTGGGTACGGGCAGACTAGAGTGATGTAGGGGAGACTGGAATTCCTGGTGTAGCGGTGAAATGCGCAGATATCAGGAGGAACACCGATGGCGAAGGCAGGTCTCTGGGCATTAACTGACGCTGAGGAGCGAAAGCATGGGGAGCGAACA
## ASV_4 GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACCTGCAGAAGAAGCGCCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGCGCAAGCGTTATCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTTTGTCGCGTCTGCCGTGAAAGTCCGGGGCTCAACTCCGGATCTGCGGTGGGTACGGGCAGACTAGAGTGATGTAGGGGAGACTGGAATTCCTGGTGTAGCGGTGAAATGCGCAGATATCAGGAGGAACACCGATGGCGAAGGCAGGTCTCTGGGCATTAACTGACGCTGAGGAGCGAAAGCATGGGGAGCGAACA
## ASV_5 GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATACGTAAGTGTTTTGACGTTACCGACAGAATAAGCACCGGCTAACTCTGTGCCAGCAGCCGCGGTAATACAGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTCGTTAAGTTGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATTCAAAACTGTCGAGCTAGAGTATGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACCACCTGGACTGATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACA
## ASV_6 GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATACGTTAGTGTTTTGACGTTACCGACAGAATAAGCACCGGCTAACTCTGTGCCAGCAGCCGCGGTAATACAGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTTGTTAAGTTGGATGTGAAAGCCCCGGGCTCAACCTGGGAACTGCATTCAAAACTGACAAGCTAGAGTATGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACCACCTGGACTGATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACA
# Intution check
stopifnot(asv_tax2$ASV == rownames(asv_tax2), rownames(asv_tax2) == rownames(asv_tab2))
01_DADA2 filesNow, we will write the files! We will write the following to the
data/01_DADA2/ folder. We will save both as files that
could be submitted as supplements AND as .RData objects for easy loading
into the next steps into R.:
ASV_counts.tsv: ASV count table that has ASV names that
are re-written and shortened headers like ASV_1, ASV_2, etc, which will
match the names in our fasta file below. This will also be saved as
data/01_DADA2/ASV_counts.RData.ASV_counts_withSeqNames.tsv: This is generated with the
data object in this file known as noChimeras_ASV_table. ASV
headers include the entire ASV sequence ~250bps. In addition,
we will save this as a .RData object as
data/01_DADA2/noChimeras_ASV_table.RData as we will use
this data in analysis/02_Taxonomic_Assignment.Rmd to assign
the taxonomy from the sequence headers.ASVs.fasta: A fasta file output of the ASV names from
ASV_counts.tsv and the sequences from the ASVs in
ASV_counts_withSeqNames.tsv. A fasta file that we can use
to build a tree for phylogenetic analyses (e.g. phylogenetic alpha
diversity metrics or UNIFRAC dissimilarty).ASVs.fasta in
data/02_TaxAss_FreshTrain/ to be used for the taxonomy
classification in the next step in the workflow.track_read_counts.RData: To track how many reads we
lost throughout our workflow that could be used and plotted later. We
will add this to the metadata in
analysis/02_Taxonomic_Assignment.Rmd.# FIRST, we will save our output as regular files, which will be useful later on.
# Save to regular .tsv file
# Write BOTH the modified and unmodified ASV tables to a file!
# Write count table with ASV numbered names (e.g. ASV_1, ASV_2, etc)
write.table(asv_tab2, "data/01_DADA2/ASV_counts2.tsv", sep = "\t", quote = FALSE, col.names = NA)
# Write count table with ASV sequence names
write.table(noChimeras_ASV_table2, "data/01_DADA2/ASV_counts_withSeqNames2.tsv", sep = "\t", quote = FALSE, col.names = NA)
# Write out the fasta file for reference later on for what seq matches what ASV
asv_fasta2 <- c(rbind(asv_headers2, asv_seqs2))
# Save to a file!
write(asv_fasta2, "data/01_DADA2/ASVs.fasta2")
# SECOND, let's save the taxonomy tables
# Write the table
write.table(asv_tax2, "data/01_DADA2/ASV_taxonomy2.tsv", sep = "\t", quote = FALSE, col.names = NA)
# THIRD, let's save to a RData object
# Each of these files will be used in the analysis/02_Taxonomic_Assignment
# RData objects are for easy loading :)
save(noChimeras_ASV_table2, file = "data/01_DADA2/noChimeras_ASV_table2.RData")
save(asv_tab2, file = "data/01_DADA2/ASV_counts2.RData")
# And save the track_counts_df a R object, which we will merge with metadata information in the next step of the analysis in analysis/02_Taxonomic_Assignment.
save(track_read_counts2, file = "data/01_DADA2/track_read_counts2.RData")
# Ensure reproducibility
devtools::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
## setting value
## version R version 4.3.2 (2023-10-31)
## os Rocky Linux 9.0 (Blue Onyx)
## system x86_64, linux-gnu
## ui X11
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz America/New_York
## date 2024-05-02
## pandoc 3.1.1 @ /usr/lib/rstudio-server/bin/quarto/bin/tools/ (via rmarkdown)
##
## ─ Packages ───────────────────────────────────────────────────────────────────
## package * version date (UTC) lib source
## abind 1.4-5 2016-07-21 [2] CRAN (R 4.3.2)
## ade4 1.7-22 2023-02-06 [1] CRAN (R 4.3.2)
## ape 5.7-1 2023-03-13 [2] CRAN (R 4.3.2)
## Biobase 2.62.0 2023-10-24 [2] Bioconductor
## BiocGenerics 0.48.1 2023-11-01 [2] Bioconductor
## BiocManager * 1.30.22 2023-08-08 [2] CRAN (R 4.3.2)
## BiocParallel 1.36.0 2023-10-24 [2] Bioconductor
## biomformat 1.30.0 2023-10-24 [1] Bioconductor
## Biostrings 2.70.1 2023-10-25 [2] Bioconductor
## bitops 1.0-7 2021-04-24 [2] CRAN (R 4.3.2)
## bslib 0.5.1 2023-08-11 [2] CRAN (R 4.3.2)
## cachem 1.0.8 2023-05-01 [2] CRAN (R 4.3.2)
## callr 3.7.3 2022-11-02 [2] CRAN (R 4.3.2)
## cli 3.6.1 2023-03-23 [2] CRAN (R 4.3.2)
## cluster 2.1.4 2022-08-22 [2] CRAN (R 4.3.2)
## codetools 0.2-19 2023-02-01 [2] CRAN (R 4.3.2)
## colorspace 2.1-0 2023-01-23 [2] CRAN (R 4.3.2)
## crayon 1.5.2 2022-09-29 [2] CRAN (R 4.3.2)
## crosstalk 1.2.0 2021-11-04 [2] CRAN (R 4.3.2)
## dada2 * 1.30.0 2023-10-24 [1] Bioconductor
## data.table 1.14.8 2023-02-17 [2] CRAN (R 4.3.2)
## DelayedArray 0.28.0 2023-10-24 [2] Bioconductor
## deldir 1.0-9 2023-05-17 [2] CRAN (R 4.3.2)
## devtools * 2.4.5 2022-10-11 [1] CRAN (R 4.3.2)
## digest 0.6.33 2023-07-07 [2] CRAN (R 4.3.2)
## dplyr * 1.1.3 2023-09-03 [2] CRAN (R 4.3.2)
## DT * 0.32 2024-02-19 [1] CRAN (R 4.3.2)
## ellipsis 0.3.2 2021-04-29 [2] CRAN (R 4.3.2)
## evaluate 0.23 2023-11-01 [2] CRAN (R 4.3.2)
## fansi 1.0.5 2023-10-08 [2] CRAN (R 4.3.2)
## farver 2.1.1 2022-07-06 [2] CRAN (R 4.3.2)
## fastmap 1.1.1 2023-02-24 [2] CRAN (R 4.3.2)
## forcats * 1.0.0 2023-01-29 [1] CRAN (R 4.3.2)
## foreach 1.5.2 2022-02-02 [2] CRAN (R 4.3.2)
## fs 1.6.3 2023-07-20 [2] CRAN (R 4.3.2)
## generics 0.1.3 2022-07-05 [2] CRAN (R 4.3.2)
## GenomeInfoDb 1.38.0 2023-10-24 [2] Bioconductor
## GenomeInfoDbData 1.2.11 2023-11-07 [2] Bioconductor
## GenomicAlignments 1.38.0 2023-10-24 [2] Bioconductor
## GenomicRanges 1.54.1 2023-10-29 [2] Bioconductor
## ggplot2 * 3.5.0 2024-02-23 [2] CRAN (R 4.3.2)
## glue 1.6.2 2022-02-24 [2] CRAN (R 4.3.2)
## gtable 0.3.4 2023-08-21 [2] CRAN (R 4.3.2)
## highr 0.10 2022-12-22 [2] CRAN (R 4.3.2)
## hms 1.1.3 2023-03-21 [1] CRAN (R 4.3.2)
## htmltools 0.5.7 2023-11-03 [2] CRAN (R 4.3.2)
## htmlwidgets 1.6.2 2023-03-17 [2] CRAN (R 4.3.2)
## httpuv 1.6.12 2023-10-23 [2] CRAN (R 4.3.2)
## hwriter 1.3.2.1 2022-04-08 [1] CRAN (R 4.3.2)
## igraph 1.5.1 2023-08-10 [2] CRAN (R 4.3.2)
## iNEXT * 3.0.0 2022-08-29 [1] CRAN (R 4.3.2)
## interp 1.1-6 2024-01-26 [1] CRAN (R 4.3.2)
## IRanges 2.36.0 2023-10-24 [2] Bioconductor
## iterators 1.0.14 2022-02-05 [2] CRAN (R 4.3.2)
## jpeg 0.1-10 2022-11-29 [1] CRAN (R 4.3.2)
## jquerylib 0.1.4 2021-04-26 [2] CRAN (R 4.3.2)
## jsonlite 1.8.7 2023-06-29 [2] CRAN (R 4.3.2)
## knitr 1.45 2023-10-30 [2] CRAN (R 4.3.2)
## labeling 0.4.3 2023-08-29 [2] CRAN (R 4.3.2)
## later 1.3.1 2023-05-02 [2] CRAN (R 4.3.2)
## lattice * 0.21-9 2023-10-01 [2] CRAN (R 4.3.2)
## latticeExtra 0.6-30 2022-07-04 [1] CRAN (R 4.3.2)
## lifecycle 1.0.3 2022-10-07 [2] CRAN (R 4.3.2)
## lubridate * 1.9.3 2023-09-27 [1] CRAN (R 4.3.2)
## magrittr 2.0.3 2022-03-30 [2] CRAN (R 4.3.2)
## MASS 7.3-60 2023-05-04 [2] CRAN (R 4.3.2)
## Matrix 1.6-1.1 2023-09-18 [2] CRAN (R 4.3.2)
## MatrixGenerics 1.14.0 2023-10-24 [2] Bioconductor
## matrixStats 1.1.0 2023-11-07 [2] CRAN (R 4.3.2)
## memoise 2.0.1 2021-11-26 [2] CRAN (R 4.3.2)
## mgcv 1.9-0 2023-07-11 [2] CRAN (R 4.3.2)
## mime 0.12 2021-09-28 [2] CRAN (R 4.3.2)
## miniUI 0.1.1.1 2018-05-18 [2] CRAN (R 4.3.2)
## multtest 2.58.0 2023-10-24 [1] Bioconductor
## munsell 0.5.0 2018-06-12 [2] CRAN (R 4.3.2)
## nlme 3.1-163 2023-08-09 [2] CRAN (R 4.3.2)
## pacman * 0.5.1 2019-03-11 [1] CRAN (R 4.3.2)
## patchwork * 1.2.0.9000 2024-03-13 [1] Github (thomasp85/patchwork@d943757)
## permute * 0.9-7 2022-01-27 [1] CRAN (R 4.3.2)
## phyloseq * 1.46.0 2023-10-24 [1] Bioconductor
## pillar 1.9.0 2023-03-22 [2] CRAN (R 4.3.2)
## pkgbuild 1.4.2 2023-06-26 [2] CRAN (R 4.3.2)
## pkgconfig 2.0.3 2019-09-22 [2] CRAN (R 4.3.2)
## pkgload 1.3.3 2023-09-22 [2] CRAN (R 4.3.2)
## plyr 1.8.9 2023-10-02 [2] CRAN (R 4.3.2)
## png 0.1-8 2022-11-29 [2] CRAN (R 4.3.2)
## prettyunits 1.2.0 2023-09-24 [2] CRAN (R 4.3.2)
## processx 3.8.2 2023-06-30 [2] CRAN (R 4.3.2)
## profvis 0.3.8 2023-05-02 [2] CRAN (R 4.3.2)
## promises 1.2.1 2023-08-10 [2] CRAN (R 4.3.2)
## ps 1.7.5 2023-04-18 [2] CRAN (R 4.3.2)
## purrr * 1.0.2 2023-08-10 [2] CRAN (R 4.3.2)
## R6 2.5.1 2021-08-19 [2] CRAN (R 4.3.2)
## RColorBrewer 1.1-3 2022-04-03 [2] CRAN (R 4.3.2)
## Rcpp * 1.0.11 2023-07-06 [2] CRAN (R 4.3.2)
## RcppParallel 5.1.7 2023-02-27 [2] CRAN (R 4.3.2)
## RCurl 1.98-1.13 2023-11-02 [2] CRAN (R 4.3.2)
## readr * 2.1.5 2024-01-10 [1] CRAN (R 4.3.2)
## remotes 2.4.2.1 2023-07-18 [2] CRAN (R 4.3.2)
## reshape2 1.4.4 2020-04-09 [2] CRAN (R 4.3.2)
## rhdf5 2.46.1 2023-11-29 [1] Bioconductor 3.18 (R 4.3.2)
## rhdf5filters 1.14.1 2023-11-06 [1] Bioconductor
## Rhdf5lib 1.24.2 2024-02-07 [1] Bioconductor 3.18 (R 4.3.2)
## rlang 1.1.2 2023-11-04 [2] CRAN (R 4.3.2)
## rmarkdown 2.25 2023-09-18 [2] CRAN (R 4.3.2)
## Rsamtools 2.18.0 2023-10-24 [2] Bioconductor
## rstudioapi 0.15.0 2023-07-07 [2] CRAN (R 4.3.2)
## S4Arrays 1.2.0 2023-10-24 [2] Bioconductor
## S4Vectors 0.40.1 2023-10-26 [2] Bioconductor
## sass 0.4.7 2023-07-15 [2] CRAN (R 4.3.2)
## scales 1.3.0 2023-11-28 [2] CRAN (R 4.3.2)
## sessioninfo 1.2.2 2021-12-06 [2] CRAN (R 4.3.2)
## shiny 1.7.5.1 2023-10-14 [2] CRAN (R 4.3.2)
## ShortRead 1.60.0 2023-10-24 [1] Bioconductor
## SparseArray 1.2.1 2023-11-05 [2] Bioconductor
## stringi 1.7.12 2023-01-11 [2] CRAN (R 4.3.2)
## stringr * 1.5.0 2022-12-02 [2] CRAN (R 4.3.2)
## SummarizedExperiment 1.32.0 2023-10-24 [2] Bioconductor
## survival 3.5-7 2023-08-14 [2] CRAN (R 4.3.2)
## tibble * 3.2.1 2023-03-20 [2] CRAN (R 4.3.2)
## tidyr * 1.3.0 2023-01-24 [2] CRAN (R 4.3.2)
## tidyselect 1.2.0 2022-10-10 [2] CRAN (R 4.3.2)
## tidyverse * 2.0.0 2023-02-22 [1] CRAN (R 4.3.2)
## timechange 0.3.0 2024-01-18 [1] CRAN (R 4.3.2)
## tzdb 0.4.0 2023-05-12 [1] CRAN (R 4.3.2)
## urlchecker 1.0.1 2021-11-30 [2] CRAN (R 4.3.2)
## usethis * 2.2.2 2023-07-06 [2] CRAN (R 4.3.2)
## utf8 1.2.4 2023-10-22 [2] CRAN (R 4.3.2)
## vctrs 0.6.4 2023-10-12 [2] CRAN (R 4.3.2)
## vegan * 2.6-4 2022-10-11 [1] CRAN (R 4.3.2)
## withr 2.5.2 2023-10-30 [2] CRAN (R 4.3.2)
## xfun 0.41 2023-11-01 [2] CRAN (R 4.3.2)
## xtable 1.8-4 2019-04-21 [2] CRAN (R 4.3.2)
## XVector 0.42.0 2023-10-24 [2] Bioconductor
## yaml 2.3.7 2023-01-23 [2] CRAN (R 4.3.2)
## zlibbioc 1.48.0 2023-10-24 [2] Bioconductor
##
## [1] /home/jmc753/R/x86_64-pc-linux-gnu-library/4.3
## [2] /programs/R-4.3.2/library
##
## ──────────────────────────────────────────────────────────────────────────────